1 /* Output routines for GCC for Hitachi Super-H.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* Contributed by Steve Chamberlain (sac@cygnus.com).
22 Improved by Jim Wilson (wilson@cygnus.com). */
31 #include "insn-flags.h"
34 #include "hard-reg-set.h"
36 #include "insn-attr.h"
38 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
40 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
41 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
43 /* ??? The pragma interrupt support will not work for SH3. */
44 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
45 output code for the next function appropriate for an interrupt handler. */
48 /* This is set by the trap_exit attribute for functions. It specifies
49 a trap number to be used in a trapa instruction at function exit
50 (instead of an rte instruction). */
53 /* This is used by the sp_switch attribute for functions. It specifies
54 a variable holding the address of the stack the interrupt function
55 should switch to/from at entry/exit. */
58 /* This is set by #pragma trapa, and is similar to the above, except that
59 the compiler doesn't emit code to preserve all registers. */
60 static int pragma_trapa;
62 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
63 which has a separate set of low regs for User and Supervisor modes.
64 This should only be used for the lowest level of interrupts. Higher levels
65 of interrupts must save the registers in case they themselves are
67 int pragma_nosave_low_regs;
69 /* This is used for communication between SETUP_INCOMING_VARARGS and
70 sh_expand_prologue. */
71 int current_function_anonymous_args;
73 /* Global variables from toplev.c and final.c that are used within, but
74 not declared in any header file. */
75 extern char *version_string;
76 extern int *insn_addresses;
78 /* Global variables for machine-dependent things. */
80 /* Which cpu are we scheduling for. */
81 enum processor_type sh_cpu;
83 /* Saved operands from the last compare to use when we generate an scc
89 enum machine_mode sh_addr_diff_vec_mode;
91 /* Provides the class number of the smallest class containing
94 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
96 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
97 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
98 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
99 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
100 GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
101 MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
102 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
103 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
104 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
105 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
108 /* Provide reg_class from a letter such as appears in the machine
111 enum reg_class reg_class_from_letter[] =
113 /* a */ NO_REGS, /* b */ NO_REGS, /* c */ NO_REGS, /* d */ NO_REGS,
114 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
115 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
116 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
117 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
118 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
119 /* y */ FPUL_REGS, /* z */ R0_REGS
122 static void split_branches PROTO ((rtx));
124 /* Print the operand address in x to the stream. */
127 print_operand_address (stream, x)
131 switch (GET_CODE (x))
134 fprintf (stream, "@%s", reg_names[REGNO (x)]);
139 rtx base = XEXP (x, 0);
140 rtx index = XEXP (x, 1);
142 switch (GET_CODE (index))
145 fprintf (stream, "@(%d,%s)", INTVAL (index),
146 reg_names[REGNO (base)]);
150 fprintf (stream, "@(r0,%s)",
151 reg_names[MAX (REGNO (base), REGNO (index))]);
162 fprintf (stream, "@-%s", reg_names[REGNO (XEXP (x, 0))]);
166 fprintf (stream, "@%s+", reg_names[REGNO (XEXP (x, 0))]);
170 output_addr_const (stream, x);
175 /* Print operand x (an rtx) in assembler syntax to file stream
176 according to modifier code.
178 '.' print a .s if insn needs delay slot
179 ',' print LOCAL_LABEL_PREFIX
180 '@' print trap, rte or rts depending upon pragma interruptness
181 '#' output a nop if there is nothing to put in the delay slot
182 'O' print a constant without the #
183 'R' print the LSW of a dp value - changes if in little endian
184 'S' print the MSW of a dp value - changes if in little endian
185 'T' print the next word of a dp value - same as 'R' in big endian mode. */
188 print_operand (stream, x, code)
197 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
198 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
201 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
205 fprintf (stream, "trapa #%d", trap_exit);
206 else if (pragma_interrupt)
207 fprintf (stream, "rte");
209 fprintf (stream, "rts");
212 /* Output a nop if there's nothing in the delay slot. */
213 if (dbr_sequence_length () == 0)
214 fprintf (stream, "\n\tnop");
217 output_addr_const (stream, x);
220 fputs (reg_names[REGNO (x) + LSW], (stream));
223 fputs (reg_names[REGNO (x) + MSW], (stream));
226 /* Next word of a double. */
227 switch (GET_CODE (x))
230 fputs (reg_names[REGNO (x) + 1], (stream));
233 print_operand_address (stream,
234 XEXP (adj_offsettable_operand (x, 4), 0));
239 switch (GET_CODE (x))
242 fputs (reg_names[REGNO (x)], (stream));
245 output_address (XEXP (x, 0));
249 output_addr_const (stream, x);
256 /* Emit code to perform a block move. Choose the best method.
258 OPERANDS[0] is the destination.
259 OPERANDS[1] is the source.
260 OPERANDS[2] is the size.
261 OPERANDS[3] is the alignment safe to use. */
264 expand_block_move (operands)
267 int align = INTVAL (operands[3]);
268 int constp = (GET_CODE (operands[2]) == CONST_INT);
269 int bytes = (constp ? INTVAL (operands[2]) : 0);
271 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
272 alignment, or if it isn't a multiple of 4 bytes, then fail. */
273 if (! constp || align < 4 || (bytes % 4 != 0))
281 rtx r4 = gen_rtx (REG, SImode, 4);
282 rtx r5 = gen_rtx (REG, SImode, 5);
284 sprintf (entry, "__movstrSI%d", bytes);
285 entry_name = get_identifier (entry);
288 = copy_to_mode_reg (Pmode,
289 gen_rtx (SYMBOL_REF, Pmode,
290 IDENTIFIER_POINTER (entry_name)));
291 emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
292 emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
293 emit_insn (gen_block_move_real (func_addr_rtx));
297 /* This is the same number of bytes as a memcpy call, but to a different
298 less common function name, so this will occasionally use more space. */
299 if (! TARGET_SMALLCODE)
303 int final_switch, while_loop;
304 rtx r4 = gen_rtx (REG, SImode, 4);
305 rtx r5 = gen_rtx (REG, SImode, 5);
306 rtx r6 = gen_rtx (REG, SImode, 6);
308 entry_name = get_identifier ("__movstr");
310 = copy_to_mode_reg (Pmode,
311 gen_rtx (SYMBOL_REF, Pmode,
312 IDENTIFIER_POINTER (entry_name)));
313 emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
314 emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
316 /* r6 controls the size of the move. 16 is decremented from it
317 for each 64 bytes moved. Then the negative bit left over is used
318 as an index into a list of move instructions. e.g., a 72 byte move
319 would be set up with size(r6) = 14, for one iteration through the
320 big while loop, and a switch of -2 for the last part. */
322 final_switch = 16 - ((bytes / 4) % 16);
323 while_loop = ((bytes / 4) / 16 - 1) * 16;
324 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
325 emit_insn (gen_block_lump_real (func_addr_rtx));
332 /* Prepare operands for a move define_expand; specifically, one of the
333 operands must be in a register. */
336 prepare_move_operands (operands, mode)
338 enum machine_mode mode;
340 if (! reload_in_progress && ! reload_completed)
342 /* Copy the source to a register if both operands aren't registers. */
343 if (! register_operand (operands[0], mode)
344 && ! register_operand (operands[1], mode))
345 operands[1] = copy_to_mode_reg (mode, operands[1]);
347 /* This case can happen while generating code to move the result
348 of a library call to the target. Reject `st r0,@(rX,rY)' because
349 reload will fail to find a spill register for rX, since r0 is already
350 being used for the source. */
351 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
352 && GET_CODE (operands[0]) == MEM
353 && GET_CODE (XEXP (operands[0], 0)) == PLUS
354 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
355 operands[1] = copy_to_mode_reg (mode, operands[1]);
361 /* Prepare the operands for an scc instruction; make sure that the
362 compare has been done. */
364 prepare_scc_operands (code)
367 rtx t_reg = gen_rtx (REG, SImode, T_REG);
368 enum rtx_code oldcode = code;
369 enum machine_mode mode;
371 /* First need a compare insn. */
375 /* It isn't possible to handle this case. */
392 rtx tmp = sh_compare_op0;
393 sh_compare_op0 = sh_compare_op1;
394 sh_compare_op1 = tmp;
397 mode = GET_MODE (sh_compare_op0);
398 if (mode == VOIDmode)
399 mode = GET_MODE (sh_compare_op1);
401 sh_compare_op0 = force_reg (mode, sh_compare_op0);
402 if ((code != EQ && code != NE
403 && (sh_compare_op1 != const0_rtx
404 || code == GTU || code == GEU || code == LTU || code == LEU))
405 || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
406 sh_compare_op1 = force_reg (mode, sh_compare_op1);
408 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
409 gen_rtx (code, SImode, sh_compare_op0,
415 /* Called from the md file, set up the operands of a compare instruction. */
418 from_compare (operands, code)
422 enum machine_mode mode = GET_MODE (sh_compare_op0);
424 if (mode == VOIDmode)
425 mode = GET_MODE (sh_compare_op1);
428 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
430 /* Force args into regs, since we can't use constants here. */
431 sh_compare_op0 = force_reg (mode, sh_compare_op0);
432 if (sh_compare_op1 != const0_rtx
433 || code == GTU || code == GEU
434 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
435 sh_compare_op1 = force_reg (mode, sh_compare_op1);
437 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
439 from_compare (operands, GT);
440 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
443 insn = gen_rtx (SET, VOIDmode,
444 gen_rtx (REG, SImode, 18),
445 gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
449 /* Functions to output assembly code. */
451 /* Return a sequence of instructions to perform DI or DF move.
453 Since the SH cannot move a DI or DF in one instruction, we have
454 to take care when we see overlapping source and dest registers. */
457 output_movedouble (insn, operands, mode)
460 enum machine_mode mode;
462 rtx dst = operands[0];
463 rtx src = operands[1];
465 if (GET_CODE (dst) == MEM
466 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
467 return "mov.l %T1,%0\n\tmov.l %1,%0";
469 if (register_operand (dst, mode)
470 && register_operand (src, mode))
472 if (REGNO (src) == MACH_REG)
473 return "sts mach,%S0\n\tsts macl,%R0";
475 /* When mov.d r1,r2 do r2->r3 then r1->r2;
476 when mov.d r1,r0 do r1->r0 then r2->r1. */
478 if (REGNO (src) + 1 == REGNO (dst))
479 return "mov %T1,%T0\n\tmov %1,%0";
481 return "mov %1,%0\n\tmov %T1,%T0";
483 else if (GET_CODE (src) == CONST_INT)
485 if (INTVAL (src) < 0)
486 output_asm_insn ("mov #-1,%S0", operands);
488 output_asm_insn ("mov #0,%S0", operands);
492 else if (GET_CODE (src) == MEM)
495 int dreg = REGNO (dst);
496 rtx inside = XEXP (src, 0);
498 if (GET_CODE (inside) == REG)
499 ptrreg = REGNO (inside);
500 else if (GET_CODE (inside) == SUBREG)
501 ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside);
502 else if (GET_CODE (inside) == PLUS)
504 ptrreg = REGNO (XEXP (inside, 0));
505 /* ??? A r0+REG address shouldn't be possible here, because it isn't
506 an offsettable address. Unfortunately, offsettable addresses use
507 QImode to check the offset, and a QImode offsettable address
508 requires r0 for the other operand, which is not currently
509 supported, so we can't use the 'o' constraint.
510 Thus we must check for and handle r0+REG addresses here.
511 We punt for now, since this is likely very rare. */
512 if (GET_CODE (XEXP (inside, 1)) == REG)
515 else if (GET_CODE (inside) == LABEL_REF)
516 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
517 else if (GET_CODE (inside) == POST_INC)
518 return "mov.l %1,%0\n\tmov.l %1,%T0";
522 /* Work out the safe way to copy. Copy into the second half first. */
524 return "mov.l %T1,%T0\n\tmov.l %1,%0";
527 return "mov.l %1,%0\n\tmov.l %T1,%T0";
530 /* Print an instruction which would have gone into a delay slot after
531 another instruction, but couldn't because the other instruction expanded
532 into a sequence where putting the slot insn at the end wouldn't work. */
538 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
540 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
544 output_far_jump (insn, op)
548 struct { rtx lab, reg, op; } this;
551 int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)];
553 this.lab = gen_label_rtx ();
555 if (offset >= -32764 && offset - get_attr_length (insn) <= 32766)
558 jump = "mov.w %O0,%1;braf %1";
563 jump = "mov.l %O0,%1;jmp @%1";
565 /* If we have a scratch register available, use it. */
566 if (GET_CODE (PREV_INSN (insn)) == INSN
567 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
569 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
570 output_asm_insn (jump, &this.lab);
571 if (dbr_sequence_length ())
572 print_slot (final_sequence);
574 output_asm_insn ("nop", 0);
578 /* Output the delay slot insn first if any. */
579 if (dbr_sequence_length ())
580 print_slot (final_sequence);
582 this.reg = gen_rtx (REG, SImode, 13);
583 output_asm_insn ("mov.l r13,@-r15", 0);
584 output_asm_insn (jump, &this.lab);
585 output_asm_insn ("mov.l @r15+,r13", 0);
588 output_asm_insn (".align 2", 0);
589 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
591 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
595 /* Local label counter, used for constants in the pool and inside
600 /* Output code for ordinary branches. */
603 output_branch (logic, insn, operands)
608 switch (get_attr_length (insn))
611 /* This can happen if filling the delay slot has caused a forward
612 branch to exceed its range (we could reverse it, but only
613 when we know we won't overextend other branches; this should
614 best be handled by relaxation).
615 It can also happen when other condbranches hoist delay slot insn
616 from their destination, thus leading to code size increase.
617 But the branch will still be in the range -4092..+4098 bytes. */
622 /* The call to print_slot will clobber the operands. */
623 rtx op0 = operands[0];
625 /* If the instruction in the delay slot is annulled (true), then
626 there is no delay slot where we can put it now. The only safe
627 place for it is after the label. final will do that by default. */
630 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
632 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
633 ASSEMBLER_DIALECT ? "/" : ".", label);
634 print_slot (final_sequence);
637 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
639 output_asm_insn ("bra\t%l0", &op0);
640 fprintf (asm_out_file, "\tnop\n");
641 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
645 /* When relaxing, handle this like a short branch. The linker
646 will fix it up if it still doesn't fit after relaxation. */
648 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
655 output_branchy_insn (code, template, insn, operands)
661 rtx next_insn = NEXT_INSN (insn);
664 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
666 rtx src = SET_SRC (PATTERN (next_insn));
667 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
669 /* Following branch not taken */
670 operands[9] = gen_label_rtx ();
671 emit_label_after (operands[9], next_insn);
676 int offset = (branch_dest (next_insn)
677 - insn_addresses[INSN_UID (next_insn)] + 4);
678 if (offset >= -252 && offset <= 258)
680 if (GET_CODE (src) == IF_THEN_ELSE)
688 operands[9] = gen_label_rtx ();
689 emit_label_after (operands[9], insn);
694 output_ieee_ccmpeq (insn, operands)
697 output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
700 /* Output to FILE the start of the assembler file. */
703 output_file_start (file)
708 output_file_directive (file, main_input_filename);
710 /* Switch to the data section so that the coffsem symbol and the
711 gcc2_compiled. symbol aren't in the text section. */
714 if (TARGET_LITTLE_ENDIAN)
715 fprintf (file, "\t.little\n");
718 /* Actual number of instructions used to make a shift by N. */
719 static char ashiftrt_insns[] =
720 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
722 /* Left shift and logical right shift are the same. */
723 static char shift_insns[] =
724 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
726 /* Individual shift amounts needed to get the above length sequences.
727 One bit right shifts clobber the T bit, so when possible, put one bit
728 shifts in the middle of the sequence, so the ends are eligible for
729 branch delay slots. */
730 static short shift_amounts[32][5] = {
731 {0}, {1}, {2}, {2, 1},
732 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
733 {8}, {8, 1}, {8, 2}, {8, 1, 2},
734 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
735 {16}, {16, 1}, {16, 2}, {16, 1, 2},
736 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
737 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
738 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
740 /* Likewise, but for shift amounts < 16, up to three highmost bits
741 might be clobbered. This is typically used when combined with some
742 kind of sign or zero extension. */
744 static char ext_shift_insns[] =
745 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
747 static short ext_shift_amounts[32][4] = {
748 {0}, {1}, {2}, {2, 1},
749 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
750 {8}, {8, 1}, {8, 2}, {8, 1, 2},
751 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
752 {16}, {16, 1}, {16, 2}, {16, 1, 2},
753 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
754 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
755 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
757 /* Assuming we have a value that has been sign-extended by at least one bit,
758 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
759 to shift it by N without data loss, and quicker than by other means? */
760 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
762 /* This is used in length attributes in sh.md to help compute the length
763 of arbitrary constant shift instructions. */
766 shift_insns_rtx (insn)
769 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
770 int shift_count = INTVAL (XEXP (set_src, 1));
771 enum rtx_code shift_code = GET_CODE (set_src);
776 return ashiftrt_insns[shift_count];
779 return shift_insns[shift_count];
785 /* Return the cost of a shift. */
791 int value = INTVAL (XEXP (x, 1));
793 /* If shift by a non constant, then this will be expensive. */
794 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
795 return SH_DYNAMIC_SHIFT_COST;
797 /* Otherwise, return the true cost in instructions. */
798 if (GET_CODE (x) == ASHIFTRT)
800 int cost = ashiftrt_insns[value];
801 /* If SH3, then we put the constant in a reg and use shad. */
802 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
803 cost = 1 + SH_DYNAMIC_SHIFT_COST;
807 return shift_insns[value];
810 /* Return the cost of an AND operation. */
818 /* Anding with a register is a single cycle and instruction. */
819 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
822 i = INTVAL (XEXP (x, 1));
823 /* These constants are single cycle extu.[bw] instructions. */
824 if (i == 0xff || i == 0xffff)
826 /* Constants that can be used in an and immediate instruction is a single
827 cycle, but this requires r0, so make it a little more expensive. */
828 if (CONST_OK_FOR_L (i))
830 /* Constants that can be loaded with a mov immediate and an and.
831 This case is probably unnecessary. */
832 if (CONST_OK_FOR_I (i))
834 /* Any other constants requires a 2 cycle pc-relative load plus an and.
835 This case is probably unnecessary. */
839 /* Return the cost of a multiply. */
846 /* We have a mul insn, so we can never take more than the mul and the
847 read of the mac reg, but count more because of the latency and extra
849 if (TARGET_SMALLCODE)
854 /* If we're aiming at small code, then just count the number of
855 insns in a multiply call sequence. */
856 if (TARGET_SMALLCODE)
859 /* Otherwise count all the insns in the routine we'd be calling too. */
863 /* Code to expand a shift. */
866 gen_ashift (type, n, reg)
871 /* Negative values here come from the shift_amounts array. */
884 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
888 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
890 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
893 emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n)));
898 /* Same for HImode */
901 gen_ashift_hi (type, n, reg)
906 /* Negative values here come from the shift_amounts array. */
920 /* We don't have HImode right shift operations because using the
921 ordinary 32 bit shift instructions for that doesn't generate proper
923 gen_ashift_hi is only called in contexts where we know that the
924 sign extension works out correctly. */
927 if (GET_CODE (reg) == SUBREG)
929 word = SUBREG_WORD (reg);
930 reg = SUBREG_REG (reg);
932 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word));
936 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
941 /* Output RTL to split a constant shift into its component SH constant
942 shift instructions. */
945 gen_shifty_op (code, operands)
949 int value = INTVAL (operands[2]);
952 /* Truncate the shift count in case it is out of bounds. */
953 value = value & 0x1f;
957 if (code == LSHIFTRT)
959 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
960 emit_insn (gen_movt (operands[0]));
963 else if (code == ASHIFT)
965 /* There is a two instruction sequence for 31 bit left shifts,
966 but it requires r0. */
967 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
969 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
970 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
977 /* This can happen when not optimizing. We must output something here
978 to prevent the compiler from aborting in final.c after the try_split
980 emit_insn (gen_nop ());
984 max = shift_insns[value];
985 for (i = 0; i < max; i++)
986 gen_ashift (code, shift_amounts[value][i], operands[0]);
989 /* Same as above, but optimized for values where the topmost bits don't
993 gen_shifty_hi_op (code, operands)
997 int value = INTVAL (operands[2]);
1001 /* This operation is used by and_shl for SImode values with a few
1002 high bits known to be cleared. */
1006 emit_insn (gen_nop ());
1010 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1013 max = ext_shift_insns[value];
1014 for (i = 0; i < max; i++)
1015 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1018 /* When shifting right, emit the shifts in reverse order, so that
1019 solitary negative values come first. */
1020 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1021 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1024 /* Output RTL for an arithmetic right shift. */
1026 /* ??? Rewrite to use super-optimizer sequences. */
1029 expand_ashiftrt (operands)
1039 if (GET_CODE (operands[2]) != CONST_INT)
1041 rtx count = copy_to_mode_reg (SImode, operands[2]);
1042 emit_insn (gen_negsi2 (count, count));
1043 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1046 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1047 > 1 + SH_DYNAMIC_SHIFT_COST)
1050 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1051 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1055 if (GET_CODE (operands[2]) != CONST_INT)
1058 value = INTVAL (operands[2]) & 31;
1062 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1065 else if (value >= 16 && value <= 19)
1067 wrk = gen_reg_rtx (SImode);
1068 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1071 gen_ashift (ASHIFTRT, 1, wrk);
1072 emit_move_insn (operands[0], wrk);
1075 /* Expand a short sequence inline, longer call a magic routine. */
1076 else if (value <= 5)
1078 wrk = gen_reg_rtx (SImode);
1079 emit_move_insn (wrk, operands[1]);
1081 gen_ashift (ASHIFTRT, 1, wrk);
1082 emit_move_insn (operands[0], wrk);
1086 wrk = gen_reg_rtx (Pmode);
1088 /* Load the value into an arg reg and call a helper. */
1089 emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
1090 sprintf (func, "__ashiftrt_r4_%d", value);
1091 func_name = get_identifier (func);
1092 emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode,
1093 IDENTIFIER_POINTER (func_name)));
1094 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1095 emit_move_insn (operands[0], gen_rtx (REG, SImode, 4));
1099 int sh_dynamicalize_shift_p (count)
1102 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1105 /* Try to find a good way to implement the combiner pattern
1106 [(set (match_operand:SI 0 "register_operand" "r")
1107 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1108 (match_operand:SI 2 "const_int_operand" "n"))
1109 (match_operand:SI 3 "const_int_operand" "n"))) .
1110 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1111 return 0 for simple right / left or left/right shift combination.
1112 return 1 for a combination of shifts with zero_extend.
1113 return 2 for a combination of shifts with an AND that needs r0.
1114 return 3 for a combination of shifts with an AND that needs an extra
1115 scratch register, when the three highmost bits of the AND mask are clear.
1116 return 4 for a combination of shifts with an AND that needs an extra
1117 scratch register, when any of the three highmost bits of the AND mask
1119 If ATTRP is set, store an initial right shift width in ATTRP[0],
1120 and the instruction length in ATTRP[1] . These values are not valid
1122 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1123 shift_amounts for the last shift value that is to be used before the
1126 shl_and_kind (left_rtx, mask_rtx, attrp)
1127 rtx left_rtx, mask_rtx;
1130 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1131 int left = INTVAL (left_rtx), right;
1133 int cost, best_cost = 10000;
1134 int best_right = 0, best_len = 0;
1138 if (left < 0 || left > 31)
1140 if (GET_CODE (mask_rtx) == CONST_INT)
1141 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1143 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1144 /* Can this be expressed as a right shift / left shift pair ? */
1145 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1146 right = exact_log2 (lsb);
1147 mask2 = ~(mask + lsb - 1);
1148 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1149 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1151 best_cost = shift_insns[right] + shift_insns[right + left];
1152 /* mask has no trailing zeroes <==> ! right */
1153 else if (! right && mask2 == ~(lsb2 - 1))
1155 int late_right = exact_log2 (lsb2);
1156 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1158 /* Try to use zero extend */
1159 if (mask2 == ~(lsb2 - 1))
1163 for (width = 8; width <= 16; width += 8)
1165 /* Can we zero-extend right away? */
1166 if (lsb2 == (HOST_WIDE_INT)1 << width)
1169 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1170 if (cost < best_cost)
1181 /* ??? Could try to put zero extend into initial right shift,
1182 or even shift a bit left before the right shift. */
1183 /* Determine value of first part of left shift, to get to the
1184 zero extend cut-off point. */
1185 first = width - exact_log2 (lsb2) + right;
1186 if (first >= 0 && right + left - first >= 0)
1188 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1189 + ext_shift_insns[right + left - first];
1190 if (cost < best_cost)
1202 /* Try to use r0 AND pattern */
1203 for (i = 0; i <= 2; i++)
1207 if (! CONST_OK_FOR_L (mask >> i))
1209 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1210 if (cost < best_cost)
1215 best_len = cost - 1;
1218 /* Try to use a scratch register to hold the AND operand. */
1219 can_ext = ((mask << left) & 0xe0000000) == 0;
1220 for (i = 0; i <= 2; i++)
1224 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1225 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1226 if (cost < best_cost)
1231 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1237 attrp[0] = best_right;
1238 attrp[1] = best_len;
1243 /* This is used in length attributes of the unnamed instructions
1244 corresponding to shl_and_kind return values of 1 and 2. */
1246 shl_and_length (insn)
1249 rtx set_src, left_rtx, mask_rtx;
1252 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1253 left_rtx = XEXP (XEXP (set_src, 0), 1);
1254 mask_rtx = XEXP (set_src, 1);
1255 shl_and_kind (left_rtx, mask_rtx, attributes);
1256 return attributes[1];
1259 /* This is used in length attribute of the and_shl_scratch instruction. */
1262 shl_and_scr_length (insn)
1265 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1266 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1267 rtx op = XEXP (set_src, 0);
1268 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1269 op = XEXP (XEXP (op, 0), 0);
1270 return len + shift_insns[INTVAL (XEXP (op, 1))];
1273 /* Generating rtl? */
1274 extern int rtx_equal_function_value_matters;
1276 /* Generate rtl for instructions for which shl_and_kind advised a particular
1277 method of generating them, i.e. returned zero. */
1280 gen_shl_and (dest, left_rtx, mask_rtx, source)
1281 rtx dest, left_rtx, mask_rtx, source;
1284 unsigned HOST_WIDE_INT mask;
1285 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1286 int right, total_shift;
1287 int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op;
1289 right = attributes[0];
1290 total_shift = INTVAL (left_rtx) + right;
1291 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1298 int first = attributes[2];
1303 emit_insn ((mask << right) <= 0xff
1304 ? gen_zero_extendqisi2(dest,
1305 gen_lowpart (QImode, source))
1306 : gen_zero_extendhisi2(dest,
1307 gen_lowpart (HImode, source)));
1311 emit_insn (gen_movsi (dest, source));
1315 operands[2] = GEN_INT (right);
1316 gen_shifty_hi_op (LSHIFTRT, operands);
1320 operands[2] = GEN_INT (first);
1321 gen_shifty_hi_op (ASHIFT, operands);
1322 total_shift -= first;
1326 emit_insn (mask <= 0xff
1327 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1328 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1329 if (total_shift > 0)
1331 operands[2] = GEN_INT (total_shift);
1332 gen_shifty_hi_op (ASHIFT, operands);
1337 shift_gen_fun = gen_shifty_op;
1339 /* If the topmost bit that matters is set, set the topmost bits
1340 that don't matter. This way, we might be able to get a shorter
1342 if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift))
1343 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1345 /* Don't expand fine-grained when combining, because that will
1346 make the pattern fail. */
1347 if (rtx_equal_function_value_matters
1348 || reload_in_progress || reload_completed)
1352 /* Cases 3 and 4 should be handled by this split
1353 only while combining */
1358 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1361 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1366 operands[2] = GEN_INT (total_shift);
1367 shift_gen_fun (ASHIFT, operands);
1374 if (kind != 4 && total_shift < 16)
1376 neg = -ext_shift_amounts[total_shift][1];
1378 neg -= ext_shift_amounts[total_shift][2];
1382 emit_insn (gen_and_shl_scratch (dest, source,
1385 GEN_INT (total_shift + neg),
1387 emit_insn (gen_movsi (dest, dest));
1394 /* Try to find a good way to implement the combiner pattern
1395 [(set (match_operand:SI 0 "register_operand" "=r")
1396 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1397 (match_operand:SI 2 "const_int_operand" "n")
1398 (match_operand:SI 3 "const_int_operand" "n")
1400 (clobber (reg:SI 18))]
1401 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1402 return 0 for simple left / right shift combination.
1403 return 1 for left shift / 8 bit sign extend / left shift.
1404 return 2 for left shift / 16 bit sign extend / left shift.
1405 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1406 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1407 return 5 for left shift / 16 bit sign extend / right shift
1408 return 6 for < 8 bit sign extend / left shift.
1409 return 7 for < 8 bit sign extend / left shift / single right shift.
1410 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1413 shl_sext_kind (left_rtx, size_rtx, costp)
1414 rtx left_rtx, size_rtx;
1417 int left, size, insize, ext;
1418 int cost, best_cost;
1421 left = INTVAL (left_rtx);
1422 size = INTVAL (size_rtx);
1423 insize = size - left;
1426 /* Default to left / right shift. */
1428 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1431 /* 16 bit shift / sign extend / 16 bit shift */
1432 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1433 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1434 below, by alternative 3 or something even better. */
1435 if (cost < best_cost)
1441 /* Try a plain sign extend between two shifts. */
1442 for (ext = 16; ext >= insize; ext -= 8)
1446 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1447 if (cost < best_cost)
1453 /* Check if we can do a sloppy shift with a final signed shift
1454 restoring the sign. */
1455 if (EXT_SHIFT_SIGNED (size - ext))
1456 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1457 /* If not, maybe it's still cheaper to do the second shift sloppy,
1458 and do a final sign extend? */
1459 else if (size <= 16)
1460 cost = ext_shift_insns[ext - insize] + 1
1461 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1464 if (cost < best_cost)
1466 kind = ext / 8U + 2;
1470 /* Check if we can sign extend in r0 */
1473 cost = 3 + shift_insns[left];
1474 if (cost < best_cost)
1479 /* Try the same with a final signed shift. */
1482 cost = 3 + ext_shift_insns[left + 1] + 1;
1483 if (cost < best_cost)
1492 /* Try to use a dynamic shift. */
1493 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1494 if (cost < best_cost)
1505 /* Function to be used in the length attribute of the instructions
1506 implementing this pattern. */
1509 shl_sext_length (insn)
1512 rtx set_src, left_rtx, size_rtx;
1515 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1516 left_rtx = XEXP (XEXP (set_src, 0), 1);
1517 size_rtx = XEXP (set_src, 1);
1518 shl_sext_kind (left_rtx, size_rtx, &cost);
1522 /* Generate rtl for this pattern */
1525 gen_shl_sext (dest, left_rtx, size_rtx, source)
1526 rtx dest, left_rtx, size_rtx, source;
1529 int left, size, insize, cost;
1532 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
1533 left = INTVAL (left_rtx);
1534 size = INTVAL (size_rtx);
1535 insize = size - left;
1543 int ext = kind & 1 ? 8 : 16;
1544 int shift2 = size - ext;
1546 /* Don't expand fine-grained when combining, because that will
1547 make the pattern fail. */
1548 if (! rtx_equal_function_value_matters
1549 && ! reload_in_progress && ! reload_completed)
1551 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1552 emit_insn (gen_movsi (dest, source));
1556 emit_insn (gen_movsi (dest, source));
1560 operands[2] = GEN_INT (ext - insize);
1561 gen_shifty_hi_op (ASHIFT, operands);
1564 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
1565 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
1570 operands[2] = GEN_INT (shift2);
1571 gen_shifty_op (ASHIFT, operands);
1578 if (EXT_SHIFT_SIGNED (shift2))
1580 operands[2] = GEN_INT (shift2 + 1);
1581 gen_shifty_op (ASHIFT, operands);
1582 operands[2] = GEN_INT (1);
1583 gen_shifty_op (ASHIFTRT, operands);
1586 operands[2] = GEN_INT (shift2);
1587 gen_shifty_hi_op (ASHIFT, operands);
1591 operands[2] = GEN_INT (-shift2);
1592 gen_shifty_hi_op (LSHIFTRT, operands);
1594 emit_insn (size <= 8
1595 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
1596 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1603 emit_insn (gen_shl_sext_ext (dest, source, GEN_INT (16 - insize),
1605 /* Don't use gen_ashrsi3 because it generates new pseudos. */
1607 gen_ashift (ASHIFTRT, 1, dest);
1612 /* Don't expand fine-grained when combining, because that will
1613 make the pattern fail. */
1614 if (! rtx_equal_function_value_matters
1615 && ! reload_in_progress && ! reload_completed)
1617 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1618 emit_insn (gen_movsi (dest, source));
1621 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
1622 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
1623 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
1625 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
1626 gen_shifty_op (ASHIFT, operands);
1628 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
1636 /* The SH cannot load a large constant into a register, constants have to
1637 come from a pc relative load. The reference of a pc relative load
1638 instruction must be less than 1k infront of the instruction. This
1639 means that we often have to dump a constant inside a function, and
1640 generate code to branch around it.
1642 It is important to minimize this, since the branches will slow things
1643 down and make things bigger.
1645 Worst case code looks like:
1663 We fix this by performing a scan before scheduling, which notices which
1664 instructions need to have their operands fetched from the constant table
1665 and builds the table.
1669 scan, find an instruction which needs a pcrel move. Look forward, find the
1670 last barrier which is within MAX_COUNT bytes of the requirement.
1671 If there isn't one, make one. Process all the instructions between
1672 the find and the barrier.
1674 In the above example, we can tell that L3 is within 1k of L1, so
1675 the first move can be shrunk from the 3 insn+constant sequence into
1676 just 1 insn, and the constant moved to L3 to make:
1687 Then the second move becomes the target for the shortening process. */
1691 rtx value; /* Value in table. */
1692 rtx label; /* Label of value. */
1693 enum machine_mode mode; /* Mode of value. */
1696 /* The maximum number of constants that can fit into one pool, since
1697 the pc relative range is 0...1020 bytes and constants are at least 4
1700 #define MAX_POOL_SIZE (1020/4)
1701 static pool_node pool_vector[MAX_POOL_SIZE];
1702 static int pool_size;
1704 /* ??? If we need a constant in HImode which is the truncated value of a
1705 constant we need in SImode, we could combine the two entries thus saving
1706 two bytes. Is this common enough to be worth the effort of implementing
1709 /* ??? This stuff should be done at the same time that we shorten branches.
1710 As it is now, we must assume that all branches are the maximum size, and
1711 this causes us to almost always output constant pools sooner than
1714 /* Add a constant to the pool and return its label. */
1717 add_constant (x, mode)
1719 enum machine_mode mode;
1724 /* First see if we've already got it. */
1725 for (i = 0; i < pool_size; i++)
1727 if (x->code == pool_vector[i].value->code
1728 && mode == pool_vector[i].mode)
1730 if (x->code == CODE_LABEL)
1732 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
1735 if (rtx_equal_p (x, pool_vector[i].value))
1736 return pool_vector[i].label;
1740 /* Need a new one. */
1741 pool_vector[pool_size].value = x;
1742 lab = gen_label_rtx ();
1743 pool_vector[pool_size].mode = mode;
1744 pool_vector[pool_size].label = lab;
1749 /* Output the literal table. */
1758 /* Do two passes, first time dump out the HI sized constants. */
1760 for (i = 0; i < pool_size; i++)
1762 pool_node *p = &pool_vector[i];
1764 if (p->mode == HImode)
1768 scan = emit_insn_after (gen_align_2 (), scan);
1771 scan = emit_label_after (p->label, scan);
1772 scan = emit_insn_after (gen_consttable_2 (p->value), scan);
1778 for (i = 0; i < pool_size; i++)
1780 pool_node *p = &pool_vector[i];
1791 scan = emit_label_after (gen_label_rtx (), scan);
1792 scan = emit_insn_after (gen_align_4 (), scan);
1795 scan = emit_label_after (p->label, scan);
1796 scan = emit_insn_after (gen_consttable_4 (p->value), scan);
1803 scan = emit_label_after (gen_label_rtx (), scan);
1804 scan = emit_insn_after (gen_align_4 (), scan);
1807 scan = emit_label_after (p->label, scan);
1808 scan = emit_insn_after (gen_consttable_8 (p->value), scan);
1816 scan = emit_insn_after (gen_consttable_end (), scan);
1817 scan = emit_barrier_after (scan);
1821 /* Return non-zero if constant would be an ok source for a
1822 mov.w instead of a mov.l. */
1828 return (GET_CODE (src) == CONST_INT
1829 && INTVAL (src) >= -32768
1830 && INTVAL (src) <= 32767);
1833 /* Non-zero if the insn is a move instruction which needs to be fixed. */
1835 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
1836 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
1837 need to fix it if the input value is CONST_OK_FOR_I. */
1843 if (GET_CODE (insn) == INSN)
1845 rtx pat = PATTERN (insn);
1846 if (GET_CODE (pat) == PARALLEL)
1847 pat = XVECEXP (pat, 0, 0);
1848 if (GET_CODE (pat) == SET
1849 /* We can load any 8 bit value if we don't care what the high
1850 order bits end up as. */
1851 && GET_MODE (SET_DEST (pat)) != QImode
1852 && CONSTANT_P (SET_SRC (pat))
1854 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
1855 && (fp_zero_operand (SET_SRC (pat))
1856 || fp_one_operand (SET_SRC (pat)))
1857 && GET_CODE (SET_DEST (pat)) == REG
1858 && REGNO (SET_DEST (pat)) >= FIRST_FP_REG
1859 && REGNO (SET_DEST (pat)) <= LAST_FP_REG)
1860 && (GET_CODE (SET_SRC (pat)) != CONST_INT
1861 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
1872 return (GET_CODE (insn) == INSN
1873 && GET_CODE (PATTERN (insn)) == SET
1874 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
1875 && XINT (SET_SRC (PATTERN (insn)), 1) == 1);
1878 /* Find the last barrier from insn FROM which is close enough to hold the
1879 constant pool. If we can't find one, then create one near the end of
1883 find_barrier (num_mova, mova, from)
1893 int leading_mova = num_mova;
1894 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
1898 /* For HImode: range is 510, add 4 because pc counts from address of
1899 second instruction after this one, subtract 2 for the jump instruction
1900 that we may need to emit before the table, subtract 2 for the instruction
1901 that fills the jump delay slot (in very rare cases, reorg will take an
1902 instruction from after the constant pool or will leave the delay slot
1903 empty). This gives 510.
1904 For SImode: range is 1020, add 4 because pc counts from address of
1905 second instruction after this one, subtract 2 in case pc is 2 byte
1906 aligned, subtract 2 for the jump instruction that we may need to emit
1907 before the table, subtract 2 for the instruction that fills the jump
1908 delay slot. This gives 1018. */
1910 /* The branch will always be shortened now that the reference address for
1911 forward branches is the successor address, thus we need no longer make
1912 adjustments to the [sh]i_limit for -O0. */
1917 while (from && count_si < si_limit && count_hi < hi_limit)
1919 int inc = get_attr_length (from);
1922 if (GET_CODE (from) == CODE_LABEL)
1925 new_align = 1 << label_to_alignment (from);
1926 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
1927 new_align = 1 << barrier_align (from);
1933 if (GET_CODE (from) == BARRIER)
1936 found_barrier = from;
1938 /* If we are at the end of the function, or in front of an alignment
1939 instruction, we need not insert an extra alignment. We prefer
1940 this kind of barrier. */
1941 if (barrier_align (from) > 2)
1942 good_barrier = from;
1945 if (broken_move (from))
1948 enum machine_mode mode;
1950 pat = PATTERN (from);
1951 if (GET_CODE (pat) == PARALLEL)
1952 pat = XVECEXP (pat, 0, 0);
1953 src = SET_SRC (pat);
1954 dst = SET_DEST (pat);
1955 mode = GET_MODE (dst);
1957 /* We must explicitly check the mode, because sometimes the
1958 front end will generate code to load unsigned constants into
1959 HImode targets without properly sign extending them. */
1960 if (mode == HImode || (mode == SImode && hi_const (src)))
1963 /* We put the short constants before the long constants, so
1964 we must count the length of short constants in the range
1965 for the long constants. */
1966 /* ??? This isn't optimal, but is easy to do. */
1971 while (si_align > 2 && found_si + si_align - 2 > count_si)
1973 if (found_si > count_si)
1974 count_si = found_si;
1975 found_si += GET_MODE_SIZE (mode);
1977 si_limit -= GET_MODE_SIZE (mode);
1987 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
1989 if (found_si > count_si)
1990 count_si = found_si;
1992 else if (GET_CODE (from) == JUMP_INSN
1993 && (GET_CODE (PATTERN (from)) == ADDR_VEC
1994 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
1998 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2000 /* We have just passed the barrier in front of the
2001 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2002 the ADDR_DIFF_VEC is accessed as data, just like our pool
2003 constants, this is a good opportunity to accommodate what
2004 we have gathered so far.
2005 If we waited any longer, we could end up at a barrier in
2006 front of code, which gives worse cache usage for separated
2007 instruction / data caches. */
2008 good_barrier = found_barrier;
2013 rtx body = PATTERN (from);
2014 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2020 if (new_align > si_align)
2022 si_limit -= count_si - 1 & new_align - si_align;
2023 si_align = new_align;
2025 count_si = count_si + new_align - 1 & -new_align;
2030 if (new_align > hi_align)
2032 hi_limit -= count_hi - 1 & new_align - hi_align;
2033 hi_align = new_align;
2035 count_hi = count_hi + new_align - 1 & -new_align;
2038 from = NEXT_INSN (from);
2044 /* Try as we might, the leading mova is out of range. Change
2045 it into a load (which will become a pcload) and retry. */
2046 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2047 INSN_CODE (mova) = -1;
2048 return find_barrier (0, 0, mova);
2052 /* Insert the constant pool table before the mova instruction,
2053 to prevent the mova label reference from going out of range. */
2055 good_barrier = found_barrier = barrier_before_mova;
2060 if (good_barrier && next_real_insn (found_barrier))
2061 found_barrier = good_barrier;
2065 /* We didn't find a barrier in time to dump our stuff,
2066 so we'll make one. */
2067 rtx label = gen_label_rtx ();
2069 /* If we exceeded the range, then we must back up over the last
2070 instruction we looked at. Otherwise, we just need to undo the
2071 NEXT_INSN at the end of the loop. */
2072 if (count_hi > hi_limit || count_si > si_limit)
2073 from = PREV_INSN (PREV_INSN (from));
2075 from = PREV_INSN (from);
2077 /* Walk back to be just before any jump or label.
2078 Putting it before a label reduces the number of times the branch
2079 around the constant pool table will be hit. Putting it before
2080 a jump makes it more likely that the bra delay slot will be
2082 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2083 || GET_CODE (from) == CODE_LABEL)
2084 from = PREV_INSN (from);
2086 from = emit_jump_insn_after (gen_jump (label), from);
2087 JUMP_LABEL (from) = label;
2088 LABEL_NUSES (label) = 1;
2089 found_barrier = emit_barrier_after (from);
2090 emit_label_after (label, found_barrier);
2093 return found_barrier;
2096 /* If the instruction INSN is implemented by a special function, and we can
2097 positively find the register that is used to call the sfunc, and this
2098 register is not used anywhere else in this instruction - except as the
2099 destination of a set, return this register; else, return 0. */
2101 sfunc_uses_reg (insn)
2105 rtx pattern, part, reg_part, reg;
2107 if (GET_CODE (insn) != INSN)
2109 pattern = PATTERN (insn);
2110 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2113 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2115 part = XVECEXP (pattern, 0, i);
2116 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2121 reg = XEXP (reg_part, 0);
2122 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2124 part = XVECEXP (pattern, 0, i);
2125 if (part == reg_part)
2127 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2128 && GET_CODE (SET_DEST (part)) == REG)
2129 ? SET_SRC (part) : part)))
2135 /* See if the only way in which INSN uses REG is by calling it, or by
2136 setting it while calling it. Set *SET to a SET rtx if the register
2140 noncall_uses_reg (reg, insn, set)
2149 reg2 = sfunc_uses_reg (insn);
2150 if (reg2 && REGNO (reg2) == REGNO (reg))
2152 pattern = single_set (insn);
2154 && GET_CODE (SET_DEST (pattern)) == REG
2155 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2159 if (GET_CODE (insn) != CALL_INSN)
2161 /* We don't use rtx_equal_p because we don't care if the mode is
2163 pattern = single_set (insn);
2165 && GET_CODE (SET_DEST (pattern)) == REG
2166 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2172 par = PATTERN (insn);
2173 if (GET_CODE (par) == PARALLEL)
2174 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2176 part = XVECEXP (par, 0, i);
2177 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2180 return reg_mentioned_p (reg, SET_SRC (pattern));
2186 pattern = PATTERN (insn);
2188 if (GET_CODE (pattern) == PARALLEL)
2192 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2193 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2195 pattern = XVECEXP (pattern, 0, 0);
2198 if (GET_CODE (pattern) == SET)
2200 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2202 /* We don't use rtx_equal_p, because we don't care if the
2203 mode is different. */
2204 if (GET_CODE (SET_DEST (pattern)) != REG
2205 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2211 pattern = SET_SRC (pattern);
2214 if (GET_CODE (pattern) != CALL
2215 || GET_CODE (XEXP (pattern, 0)) != MEM
2216 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2222 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2223 general registers. Bits 0..15 mean that the respective registers
2224 are used as inputs in the instruction. Bits 16..31 mean that the
2225 registers 0..15, respectively, are used as outputs, or are clobbered.
2226 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2228 regs_used (x, is_dest)
2237 code = GET_CODE (x);
2242 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2243 << (REGNO (x) + is_dest));
2247 rtx y = SUBREG_REG (x);
2249 if (GET_CODE (y) != REG)
2252 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2253 << (REGNO (y) + SUBREG_WORD (x) + is_dest));
2257 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2259 /* If there was a return value, it must have been indicated with USE. */
2272 fmt = GET_RTX_FORMAT (code);
2274 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2279 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2280 used |= regs_used (XVECEXP (x, i, j), is_dest);
2282 else if (fmt[i] == 'e')
2283 used |= regs_used (XEXP (x, i), is_dest);
2288 /* Create an instruction that prevents redirection of a conditional branch
2289 to the destination of the JUMP with address ADDR.
2290 If the branch needs to be implemented as an indirect jump, try to find
2291 a scratch register for it.
2292 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
2293 If any preceding insn that doesn't fit into a delay slot is good enough,
2294 pass 1. Pass 2 if a definite blocking insn is needed.
2295 -1 is used internally to avoid deep recursion.
2296 If a blocking instruction is made or recognized, return it. */
2299 gen_block_redirect (jump, addr, need_block)
2301 int addr, need_block;
2304 rtx prev = prev_nonnote_insn (jump);
2307 /* First, check if we already have an instruction that satisfies our need. */
2308 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
2310 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2312 if (GET_CODE (PATTERN (prev)) == USE
2313 || GET_CODE (PATTERN (prev)) == CLOBBER
2314 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2316 else if ((need_block &= ~1) < 0)
2318 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
2321 /* We can't use JUMP_LABEL here because it might be undefined
2322 when not optimizing. */
2323 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
2324 /* If the branch is out of range, try to find a scratch register for it. */
2326 && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098))
2329 /* Don't look for the stack pointer as a scratch register,
2330 it would cause trouble if an interrupt occurred. */
2331 unsigned try = 0x7fff, used;
2332 int jump_left = flag_expensive_optimizations + 1;
2334 /* It is likely that the most recent eligible instruction is wanted for
2335 the delay slot. Therefore, find out which registers it uses, and
2336 try to avoid using them. */
2338 for (scan = jump; scan = PREV_INSN (scan); )
2342 if (INSN_DELETED_P (scan))
2344 code = GET_CODE (scan);
2345 if (code == CODE_LABEL || code == JUMP_INSN)
2348 && GET_CODE (PATTERN (scan)) != USE
2349 && GET_CODE (PATTERN (scan)) != CLOBBER
2350 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
2352 try &= ~regs_used (PATTERN (scan), 0);
2356 for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); )
2360 if (INSN_DELETED_P (scan))
2362 code = GET_CODE (scan);
2363 if (GET_RTX_CLASS (code) == 'i')
2365 used |= regs_used (PATTERN (scan), 0);
2366 if (code == CALL_INSN)
2367 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
2368 dead |= (used >> 16) & ~used;
2374 if (code == JUMP_INSN)
2375 if (jump_left-- && simplejump_p (scan))
2376 scan = JUMP_LABEL (scan);
2381 /* Mask out the stack pointer again, in case it was
2382 the only 'free' register we have found. */
2385 /* If the immediate destination is still in range, check for possible
2386 threading with a jump beyond the delay slot insn.
2387 Don't check if we are called recursively; the jump has been or will be
2388 checked in a different invocation then. */
2390 else if (optimize && need_block >= 0)
2392 rtx next = next_active_insn (next_active_insn (dest));
2393 if (next && GET_CODE (next) == JUMP_INSN
2394 && GET_CODE (PATTERN (next)) == SET
2395 && recog_memoized (next) == CODE_FOR_jump)
2397 dest = JUMP_LABEL (next);
2399 && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)
2400 gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1);
2406 rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead));
2408 /* It would be nice if we could convert the jump into an indirect
2409 jump / far branch right now, and thus exposing all constituent
2410 instructions to further optimization. However, reorg uses
2411 simplejump_p to determine if there is an unconditional jump where
2412 it should try to schedule instructions from the target of the
2413 branch; simplejump_p fails for indirect jumps even if they have
2415 rtx insn = emit_insn_before (gen_indirect_jump_scratch
2416 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
2418 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
2421 else if (need_block)
2422 /* We can't use JUMP_LABEL here because it might be undefined
2423 when not optimizing. */
2424 return emit_insn_before (gen_block_branch_redirect
2425 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
2430 #define CONDJUMP_MIN -252
2431 #define CONDJUMP_MAX 262
2434 /* A label (to be placed) in front of the jump
2435 that jumps to our ultimate destination. */
2437 /* Where we are going to insert it if we cannot move the jump any farther,
2438 or the jump itself if we have picked up an existing jump. */
2440 /* The ultimate destination. */
2442 struct far_branch *prev;
2443 /* If the branch has already been created, its address;
2444 else the address of its first prospective user. */
2448 enum mdep_reorg_phase_e mdep_reorg_phase;
2451 struct far_branch *bp;
2453 rtx insn = bp->insert_place;
2455 rtx label = gen_label_rtx ();
2457 emit_label_after (label, insn);
2460 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
2461 LABEL_NUSES (bp->far_label)++;
2464 jump = emit_jump_insn_after (gen_return (), insn);
2465 emit_label_after (bp->near_label, insn);
2466 JUMP_LABEL (jump) = bp->far_label;
2467 if (! invert_jump (insn, label))
2469 /* Prevent reorg from undoing our splits. */
2470 gen_block_redirect (jump, bp->address += 2, 2);
2473 /* Fix up ADDR_DIFF_VECs. */
2475 fixup_addr_diff_vecs (first)
2480 for (insn = first; insn; insn = NEXT_INSN (insn))
2482 rtx vec_lab, pat, prev, prevpat, x;
2484 if (GET_CODE (insn) != JUMP_INSN
2485 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
2487 pat = PATTERN (insn);
2488 vec_lab = XEXP (XEXP (pat, 0), 0);
2490 /* Search the matching casesi_jump_2. */
2491 for (prev = vec_lab; ; prev = PREV_INSN (prev))
2493 if (GET_CODE (prev) != JUMP_INSN)
2495 prevpat = PATTERN (prev);
2496 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
2498 x = XVECEXP (prevpat, 0, 1);
2499 if (GET_CODE (x) != USE)
2502 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
2505 /* Fix up the ADDR_DIF_VEC to be relative
2506 to the reference address of the braf. */
2507 XEXP (XEXP (pat, 0), 0)
2508 = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
2512 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
2513 a barrier. Return the base 2 logarithm of the desired alignment. */
2515 barrier_align (barrier_or_label)
2516 rtx barrier_or_label;
2518 rtx next = next_real_insn (barrier_or_label), pat, prev;
2524 pat = PATTERN (next);
2526 if (GET_CODE (pat) == ADDR_DIFF_VEC)
2529 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1)
2530 /* This is a barrier in front of a constant table. */
2533 prev = prev_real_insn (barrier_or_label);
2534 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
2536 pat = PATTERN (prev);
2537 /* If this is a very small table, we want to keep the alignment after
2538 the table to the minimum for proper code alignment. */
2539 return ((TARGET_SMALLCODE
2540 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
2541 <= 1 << (CACHE_LOG - 2)))
2545 if (TARGET_SMALLCODE)
2548 if (! TARGET_SH3 || ! optimize)
2551 /* Check if there is an immediately preceding branch to the insn beyond
2552 the barrier. We must weight the cost of discarding useful information
2553 from the current cache line when executing this branch and there is
2554 an alignment, against that of fetching unneeded insn in front of the
2555 branch target when there is no alignment. */
2557 /* PREV is presumed to be the JUMP_INSN for the barrier under
2558 investigation. Skip to the insn before it. */
2559 prev = prev_real_insn (prev);
2561 for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
2562 credit >= 0 && prev && GET_CODE (prev) == INSN;
2563 prev = prev_real_insn (prev))
2565 if (GET_CODE (PATTERN (prev)) == USE
2566 || GET_CODE (PATTERN (prev)) == CLOBBER)
2568 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2569 prev = XVECEXP (PATTERN (prev), 0, 1);
2571 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2573 credit -= get_attr_length (prev);
2576 && GET_CODE (prev) == JUMP_INSN
2577 && JUMP_LABEL (prev)
2578 && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
2579 && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
2585 /* Exported to toplev.c.
2587 Do a final pass over the function, just before delayed branch
2591 machine_dependent_reorg (first)
2596 rtx r0_rtx = gen_rtx (REG, Pmode, 0);
2597 rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx);
2599 /* If relaxing, generate pseudo-ops to associate function calls with
2600 the symbols they call. It does no harm to not generate these
2601 pseudo-ops. However, when we can generate them, it enables to
2602 linker to potentially relax the jsr to a bsr, and eliminate the
2603 register load and, possibly, the constant pool entry. */
2605 mdep_reorg_phase = SH_INSERT_USES_LABELS;
2608 /* Remove all REG_LABEL notes. We want to use them for our own
2609 purposes. This works because none of the remaining passes
2610 need to look at them.
2612 ??? But it may break in the future. We should use a machine
2613 dependent REG_NOTE, or some other approach entirely. */
2614 for (insn = first; insn; insn = NEXT_INSN (insn))
2616 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2620 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
2621 remove_note (insn, note);
2625 for (insn = first; insn; insn = NEXT_INSN (insn))
2627 rtx pattern, reg, link, set, scan, dies, label;
2628 int rescan = 0, foundinsn = 0;
2630 if (GET_CODE (insn) == CALL_INSN)
2632 pattern = PATTERN (insn);
2634 if (GET_CODE (pattern) == PARALLEL)
2635 pattern = XVECEXP (pattern, 0, 0);
2636 if (GET_CODE (pattern) == SET)
2637 pattern = SET_SRC (pattern);
2639 if (GET_CODE (pattern) != CALL
2640 || GET_CODE (XEXP (pattern, 0)) != MEM)
2643 reg = XEXP (XEXP (pattern, 0), 0);
2647 reg = sfunc_uses_reg (insn);
2652 if (GET_CODE (reg) != REG)
2655 /* This is a function call via REG. If the only uses of REG
2656 between the time that it is set and the time that it dies
2657 are in function calls, then we can associate all the
2658 function calls with the setting of REG. */
2660 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
2662 if (REG_NOTE_KIND (link) != 0)
2664 set = single_set (XEXP (link, 0));
2665 if (set && rtx_equal_p (reg, SET_DEST (set)))
2667 link = XEXP (link, 0);
2674 /* ??? Sometimes global register allocation will have
2675 deleted the insn pointed to by LOG_LINKS. Try
2676 scanning backward to find where the register is set. */
2677 for (scan = PREV_INSN (insn);
2678 scan && GET_CODE (scan) != CODE_LABEL;
2679 scan = PREV_INSN (scan))
2681 if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
2684 if (! reg_mentioned_p (reg, scan))
2687 if (noncall_uses_reg (reg, scan, &set))
2701 /* The register is set at LINK. */
2703 /* We can only optimize the function call if the register is
2704 being set to a symbol. In theory, we could sometimes
2705 optimize calls to a constant location, but the assembler
2706 and linker do not support that at present. */
2707 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
2708 && GET_CODE (SET_SRC (set)) != LABEL_REF)
2711 /* Scan forward from LINK to the place where REG dies, and
2712 make sure that the only insns which use REG are
2713 themselves function calls. */
2715 /* ??? This doesn't work for call targets that were allocated
2716 by reload, since there may not be a REG_DEAD note for the
2720 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
2724 /* Don't try to trace forward past a CODE_LABEL if we haven't
2725 seen INSN yet. Ordinarily, we will only find the setting insn
2726 in LOG_LINKS if it is in the same basic block. However,
2727 cross-jumping can insert code labels in between the load and
2728 the call, and can result in situations where a single call
2729 insn may have two targets depending on where we came from. */
2731 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
2734 if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
2737 /* Don't try to trace forward past a JUMP. To optimize
2738 safely, we would have to check that all the
2739 instructions at the jump destination did not use REG. */
2741 if (GET_CODE (scan) == JUMP_INSN)
2744 if (! reg_mentioned_p (reg, scan))
2747 if (noncall_uses_reg (reg, scan, &scanset))
2754 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
2756 /* There is a function call to this register other
2757 than the one we are checking. If we optimize
2758 this call, we need to rescan again below. */
2762 /* ??? We shouldn't have to worry about SCANSET here.
2763 We should just be able to check for a REG_DEAD note
2764 on a function call. However, the REG_DEAD notes are
2765 apparently not dependable around libcalls; c-torture
2766 execute/920501-2 is a test case. If SCANSET is set,
2767 then this insn sets the register, so it must have
2768 died earlier. Unfortunately, this will only handle
2769 the cases in which the register is, in fact, set in a
2772 /* ??? We shouldn't have to use FOUNDINSN here.
2773 However, the LOG_LINKS fields are apparently not
2774 entirely reliable around libcalls;
2775 newlib/libm/math/e_pow.c is a test case. Sometimes
2776 an insn will appear in LOG_LINKS even though it is
2777 not the most recent insn which sets the register. */
2781 || find_reg_note (scan, REG_DEAD, reg)))
2790 /* Either there was a branch, or some insn used REG
2791 other than as a function call address. */
2795 /* Create a code label, and put it in a REG_LABEL note on
2796 the insn which sets the register, and on each call insn
2797 which uses the register. In final_prescan_insn we look
2798 for the REG_LABEL notes, and output the appropriate label
2801 label = gen_label_rtx ();
2802 REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label,
2804 REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label,
2813 scan = NEXT_INSN (scan);
2815 && ((GET_CODE (scan) == CALL_INSN
2816 && reg_mentioned_p (reg, scan))
2817 || ((reg2 = sfunc_uses_reg (scan))
2818 && REGNO (reg2) == REGNO (reg))))
2819 REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL,
2820 label, REG_NOTES (scan));
2822 while (scan != dies);
2828 fixup_addr_diff_vecs (first);
2832 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
2833 shorten_branches (first);
2835 /* Scan the function looking for move instructions which have to be
2836 changed to pc-relative loads and insert the literal tables. */
2838 mdep_reorg_phase = SH_FIXUP_PCLOAD;
2839 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
2846 else if (GET_CODE (insn) == JUMP_INSN
2847 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
2855 /* Some code might have been inserted between the mova and
2856 its ADDR_DIFF_VEC. Check if the mova is still in range. */
2857 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
2858 total += get_attr_length (scan);
2860 /* range of mova is 1020, add 4 because pc counts from address of
2861 second instruction after this one, subtract 2 in case pc is 2
2862 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
2863 cancels out with alignment effects of the mova itself. */
2866 /* Change the mova into a load, and restart scanning
2867 there. broken_move will then return true for mova. */
2868 SET_SRC (PATTERN (mova))
2869 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2870 INSN_CODE (mova) = -1;
2874 if (broken_move (insn))
2877 /* Scan ahead looking for a barrier to stick the constant table
2879 rtx barrier = find_barrier (num_mova, mova, insn);
2880 rtx last_float_move, last_float = 0, *last_float_addr;
2882 if (num_mova && ! mova_p (mova))
2884 /* find_barrier had to change the first mova into a
2885 pcload; thus, we have to start with this new pcload. */
2889 /* Now find all the moves between the points and modify them. */
2890 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
2892 if (GET_CODE (scan) == CODE_LABEL)
2894 if (broken_move (scan))
2896 rtx *patp = &PATTERN (scan), pat = *patp;
2901 enum machine_mode mode;
2903 if (GET_CODE (pat) == PARALLEL)
2904 patp = &XVECEXP (pat, 0, 0), pat = *patp;
2905 src = SET_SRC (pat);
2906 dst = SET_DEST (pat);
2907 mode = GET_MODE (dst);
2909 if (mode == SImode && hi_const (src))
2914 while (GET_CODE (dst) == SUBREG)
2916 offset += SUBREG_WORD (dst);
2917 dst = SUBREG_REG (dst);
2919 dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
2922 if (GET_CODE (dst) == REG
2923 && ((REGNO (dst) >= FIRST_FP_REG
2924 && REGNO (dst) <= LAST_FP_REG)
2925 || REGNO (dst) == FPUL_REG))
2928 && reg_set_between_p (r0_rtx, last_float_move, scan))
2930 lab = add_constant (src, mode, last_float);
2932 emit_insn_before (gen_mova (lab), scan);
2934 *last_float_addr = r0_inc_rtx;
2935 last_float_move = scan;
2937 newsrc = gen_rtx (MEM, mode,
2938 (REGNO (dst) == FPUL_REG
2941 last_float_addr = &XEXP (newsrc, 0);
2945 lab = add_constant (src, mode, 0);
2946 newsrc = gen_rtx (MEM, mode,
2947 gen_rtx (LABEL_REF, VOIDmode, lab));
2949 RTX_UNCHANGING_P (newsrc) = 1;
2950 *patp = gen_rtx (SET, VOIDmode, dst, newsrc);
2951 INSN_CODE (scan) = -1;
2954 dump_table (barrier);
2959 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
2961 split_branches (first);
2963 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
2964 also has an effect on the register that holds the addres of the sfunc.
2965 Insert an extra dummy insn in front of each sfunc that pretends to
2966 use this register. */
2967 if (flag_delayed_branch)
2969 for (insn = first; insn; insn = NEXT_INSN (insn))
2971 rtx reg = sfunc_uses_reg (insn);
2975 emit_insn_before (gen_use_sfunc_addr (reg), insn);
2978 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
2982 get_dest_uid (label, max_uid)
2986 rtx dest = next_real_insn (label);
2989 /* This can happen for an undefined label. */
2991 dest_uid = INSN_UID (dest);
2992 /* If this is a newly created branch redirection blocking instruction,
2993 we cannot index the branch_uid or insn_addresses arrays with its
2994 uid. But then, we won't need to, because the actual destination is
2995 the following branch. */
2996 while (dest_uid >= max_uid)
2998 dest = NEXT_INSN (dest);
2999 dest_uid = INSN_UID (dest);
3001 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3006 /* Split condbranches that are out of range. Also add clobbers for
3007 scratch registers that are needed in far jumps.
3008 We do this before delay slot scheduling, so that it can take our
3009 newly created instructions into account. It also allows us to
3010 find branches with common targets more easily. */
3013 split_branches (first)
3017 struct far_branch **uid_branch, *far_branch_list = 0;
3018 int max_uid = get_max_uid ();
3020 /* Find out which branches are out of range. */
3021 shorten_branches (first);
3023 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3024 bzero ((char *) uid_branch, max_uid * sizeof *uid_branch);
3026 for (insn = first; insn; insn = NEXT_INSN (insn))
3027 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
3029 else if (INSN_DELETED_P (insn))
3031 /* Shorten_branches would split this instruction again,
3032 so transform it into a note. */
3033 PUT_CODE (insn, NOTE);
3034 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3035 NOTE_SOURCE_FILE (insn) = 0;
3037 else if (GET_CODE (insn) == JUMP_INSN
3038 /* Don't mess with ADDR_DIFF_VEC */
3039 && (GET_CODE (PATTERN (insn)) == SET
3040 || GET_CODE (PATTERN (insn)) == RETURN))
3042 enum attr_type type = get_attr_type (insn);
3043 if (type == TYPE_CBRANCH)
3047 if (get_attr_length (insn) > 4)
3049 rtx src = SET_SRC (PATTERN (insn));
3050 rtx cond = XEXP (src, 0);
3051 rtx olabel = XEXP (XEXP (src, 1), 0);
3053 int addr = insn_addresses[INSN_UID (insn)];
3055 int dest_uid = get_dest_uid (olabel, max_uid);
3056 struct far_branch *bp = uid_branch[dest_uid];
3058 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3059 the label if the LABEL_NUSES count drops to zero. There is
3060 always a jump_optimize pass that sets these values, but it
3061 proceeds to delete unreferenced code, and then if not
3062 optimizing, to un-delete the deleted instructions, thus
3063 leaving labels with too low uses counts. */
3066 JUMP_LABEL (insn) = olabel;
3067 LABEL_NUSES (olabel)++;
3071 bp = (struct far_branch *) alloca (sizeof *bp);
3072 uid_branch[dest_uid] = bp;
3073 bp->prev = far_branch_list;
3074 far_branch_list = bp;
3076 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3077 LABEL_NUSES (bp->far_label)++;
3081 label = bp->near_label;
3082 if (! label && bp->address - addr >= CONDJUMP_MIN)
3084 rtx block = bp->insert_place;
3086 if (GET_CODE (PATTERN (block)) == RETURN)
3087 block = PREV_INSN (block);
3089 block = gen_block_redirect (block,
3091 label = emit_label_after (gen_label_rtx (),
3093 bp->near_label = label;
3095 else if (label && ! NEXT_INSN (label))
3096 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3097 bp->insert_place = insn;
3099 gen_far_branch (bp);
3102 || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)
3104 bp->near_label = label = gen_label_rtx ();
3105 bp->insert_place = insn;
3108 if (! redirect_jump (insn, label))
3113 /* get_attr_length (insn) == 2 */
3114 /* Check if we have a pattern where reorg wants to redirect
3115 the branch to a label from an unconditional branch that
3117 /* We can't use JUMP_LABEL here because it might be undefined
3118 when not optimizing. */
3119 /* A syntax error might cause beyond to be NULL_RTX. */
3121 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
3125 && (GET_CODE (beyond) == JUMP_INSN
3126 || (GET_CODE (beyond = next_active_insn (beyond))
3128 && GET_CODE (PATTERN (beyond)) == SET
3129 && recog_memoized (beyond) == CODE_FOR_jump
3130 && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))]
3131 - insn_addresses[INSN_UID (insn)] + 252U)
3133 gen_block_redirect (beyond,
3134 insn_addresses[INSN_UID (beyond)], 1);
3137 next = next_active_insn (insn);
3139 if ((GET_CODE (next) == JUMP_INSN
3140 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
3141 && GET_CODE (PATTERN (next)) == SET
3142 && recog_memoized (next) == CODE_FOR_jump
3143 && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))]
3144 - insn_addresses[INSN_UID (insn)] + 252U)
3146 gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1);
3148 else if (type == TYPE_JUMP || type == TYPE_RETURN)
3150 int addr = insn_addresses[INSN_UID (insn)];
3153 struct far_branch *bp;
3155 if (type == TYPE_JUMP)
3157 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
3158 dest_uid = get_dest_uid (far_label, max_uid);
3161 /* Parse errors can lead to labels outside
3163 if (! NEXT_INSN (far_label))
3168 JUMP_LABEL (insn) = far_label;
3169 LABEL_NUSES (far_label)++;
3171 redirect_jump (insn, NULL_RTX);
3175 bp = uid_branch[dest_uid];
3178 bp = (struct far_branch *) alloca (sizeof *bp);
3179 uid_branch[dest_uid] = bp;
3180 bp->prev = far_branch_list;
3181 far_branch_list = bp;
3183 bp->far_label = far_label;
3185 LABEL_NUSES (far_label)++;
3187 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
3188 if (addr - bp->address <= CONDJUMP_MAX)
3189 emit_label_after (bp->near_label, PREV_INSN (insn));
3192 gen_far_branch (bp);
3198 bp->insert_place = insn;
3200 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
3202 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
3205 /* Generate all pending far branches,
3206 and free our references to the far labels. */
3207 while (far_branch_list)
3209 if (far_branch_list->near_label
3210 && ! NEXT_INSN (far_branch_list->near_label))
3211 gen_far_branch (far_branch_list);
3213 && far_branch_list->far_label
3214 && ! --LABEL_NUSES (far_branch_list->far_label))
3215 delete_insn (far_branch_list->far_label);
3216 far_branch_list = far_branch_list->prev;
3219 /* Instruction length information is no longer valid due to the new
3220 instructions that have been generated. */
3221 init_insn_lengths ();
3224 /* Dump out instruction addresses, which is useful for debugging the
3225 constant pool table stuff.
3227 If relaxing, output the label and pseudo-ops used to link together
3228 calls and the instruction which set the registers. */
3230 /* ??? This is unnecessary, and probably should be deleted. This makes
3231 the insn_addresses declaration above unnecessary. */
3233 /* ??? The addresses printed by this routine for insns are nonsense for
3234 insns which are inside of a sequence where none of the inner insns have
3235 variable length. This is because the second pass of shorten_branches
3236 does not bother to update them. */
3239 final_prescan_insn (insn, opvec, noperands)
3244 if (TARGET_DUMPISIZE)
3245 fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]);
3251 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3256 pattern = PATTERN (insn);
3257 if (GET_CODE (pattern) == PARALLEL)
3258 pattern = XVECEXP (pattern, 0, 0);
3259 if (GET_CODE (pattern) == CALL
3260 || (GET_CODE (pattern) == SET
3261 && (GET_CODE (SET_SRC (pattern)) == CALL
3262 || get_attr_type (insn) == TYPE_SFUNC)))
3263 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
3264 CODE_LABEL_NUMBER (XEXP (note, 0)));
3265 else if (GET_CODE (pattern) == SET)
3266 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3267 CODE_LABEL_NUMBER (XEXP (note, 0)));
3274 /* Dump out any constants accumulated in the final pass. These will
3278 output_jump_label_table ()
3284 fprintf (asm_out_file, "\t.align 2\n");
3285 for (i = 0; i < pool_size; i++)
3287 pool_node *p = &pool_vector[i];
3289 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3290 CODE_LABEL_NUMBER (p->label));
3291 output_asm_insn (".long %O0", &p->value);
3299 /* A full frame looks like:
3303 [ if current_function_anonymous_args
3316 local-0 <- fp points here. */
3318 /* Number of bytes pushed for anonymous args, used to pass information
3319 between expand_prologue and expand_epilogue. */
3321 static int extra_push;
3323 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
3324 to be adjusted, and TEMP, if nonnegative, holds the register number
3325 of a general register that we may clobber. */
3328 output_stack_adjust (size, reg, temp)
3335 if (CONST_OK_FOR_I (size))
3336 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
3337 /* Try to do it with two partial adjustments; however, we must make
3338 sure that the stack is properly aligned at all times, in case
3339 an interrupt occurs between the two partial adjustments. */
3340 else if (CONST_OK_FOR_I (size / 2 & -4)
3341 && CONST_OK_FOR_I (size - (size / 2 & -4)))
3343 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
3344 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
3350 /* If TEMP is invalid, we could temporarily save a general
3351 register to MACL. However, there is currently no need
3352 to handle this case, so just abort when we see it. */
3355 const_reg = gen_rtx (REG, SImode, temp);
3357 /* If SIZE is negative, subtract the positive value.
3358 This sometimes allows a constant pool entry to be shared
3359 between prologue and epilogue code. */
3362 emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
3363 emit_insn (gen_subsi3 (reg, reg, const_reg));
3367 emit_insn (gen_movsi (const_reg, GEN_INT (size)));
3368 emit_insn (gen_addsi3 (reg, reg, const_reg));
3374 /* Output RTL to push register RN onto the stack. */
3381 if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
3383 x = gen_push_e (gen_rtx (REG, SFmode, rn));
3385 x = gen_push (gen_rtx (REG, SImode, rn));
3388 REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
3389 gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
3392 /* Output RTL to pop register RN from the stack. */
3399 if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
3401 x = gen_pop_e (gen_rtx (REG, SFmode, rn));
3403 x = gen_pop (gen_rtx (REG, SImode, rn));
3406 REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
3407 gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
3410 /* Generate code to push the regs specified in the mask. */
3413 push_regs (mask, mask2)
3418 /* Push PR last; this gives better latencies after the prologue, and
3419 candidates for the return delay slot when there are no general
3420 registers pushed. */
3421 for (i = 0; i < 32; i++)
3422 if (mask & (1 << i) && i != PR_REG)
3424 for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
3425 if (mask2 & (1 << (i - 32)))
3427 if (mask & (1 << PR_REG))
3431 /* Work out the registers which need to be saved, both as a mask and a
3432 count of saved words.
3434 If doing a pragma interrupt function, then push all regs used by the
3435 function, and if we call another function (we can tell by looking at PR),
3436 make sure that all the regs it clobbers are safe too. */
3439 calc_live_regs (count_ptr, live_regs_mask2)
3441 int *live_regs_mask2;
3444 int live_regs_mask = 0;
3447 *live_regs_mask2 = 0;
3448 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
3450 if ((pragma_interrupt && ! pragma_trapa)
3451 ? (/* Need to save all the regs ever live. */
3452 (regs_ever_live[reg]
3453 || (call_used_regs[reg]
3454 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
3455 && regs_ever_live[PR_REG]))
3456 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
3457 && reg != RETURN_ADDRESS_POINTER_REGNUM
3458 && reg != T_REG && reg != GBR_REG)
3459 : (/* Only push those regs which are used and need to be saved. */
3460 regs_ever_live[reg] && ! call_used_regs[reg]))
3463 *live_regs_mask2 |= 1 << (reg - 32);
3465 live_regs_mask |= 1 << reg;
3471 return live_regs_mask;
3474 /* Code to generate prologue and epilogue sequences */
3477 sh_expand_prologue ()
3481 int live_regs_mask2;
3482 int double_align = 0;
3484 /* We have pretend args if we had an object sent partially in registers
3485 and partially on the stack, e.g. a large structure. */
3486 output_stack_adjust (-current_function_pretend_args_size,
3487 stack_pointer_rtx, 3);
3491 /* This is set by SETUP_VARARGS to indicate that this is a varargs
3492 routine. Clear it here so that the next function isn't affected. */
3493 if (current_function_anonymous_args)
3495 current_function_anonymous_args = 0;
3497 /* This is not used by the SH3E calling convention */
3500 /* Push arg regs as if they'd been provided by caller in stack. */
3501 for (i = 0; i < NPARM_REGS(SImode); i++)
3503 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
3504 if (i >= (NPARM_REGS(SImode)
3505 - current_function_args_info.arg_count[(int) SH_ARG_INT]
3514 /* If we're supposed to switch stacks at function entry, do so now. */
3516 emit_insn (gen_sp_switch_1 ());
3518 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3519 push_regs (live_regs_mask, live_regs_mask2);
3521 if (TARGET_ALIGN_DOUBLE && d & 1)
3524 output_stack_adjust (-get_frame_size () - double_align,
3525 stack_pointer_rtx, 3);
3527 if (frame_pointer_needed)
3528 emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
3532 sh_expand_epilogue ()
3537 int live_regs_mask2;
3538 int frame_size = get_frame_size ();
3540 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3542 if (TARGET_ALIGN_DOUBLE && d & 1)
3545 if (frame_pointer_needed)
3547 output_stack_adjust (frame_size, frame_pointer_rtx, 7);
3549 /* We must avoid moving the stack pointer adjustment past code
3550 which reads from the local frame, else an interrupt could
3551 occur after the SP adjustment and clobber data in the local
3553 emit_insn (gen_blockage ());
3554 emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
3556 else if (frame_size)
3558 /* We must avoid moving the stack pointer adjustment past code
3559 which reads from the local frame, else an interrupt could
3560 occur after the SP adjustment and clobber data in the local
3562 emit_insn (gen_blockage ());
3563 output_stack_adjust (frame_size, stack_pointer_rtx, 7);
3566 /* Pop all the registers. */
3568 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3569 if (live_regs_mask & (1 << PR_REG))
3571 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3573 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
3574 if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
3576 else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
3580 output_stack_adjust (extra_push + current_function_pretend_args_size,
3581 stack_pointer_rtx, 7);
3583 /* Switch back to the normal stack if necessary. */
3585 emit_insn (gen_sp_switch_2 ());
3588 /* Clear variables at function end. */
3591 function_epilogue (stream, size)
3595 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
3596 sp_switch = NULL_RTX;
3600 sh_builtin_saveregs (arglist)
3603 tree fntype = TREE_TYPE (current_function_decl);
3604 /* First unnamed integer register. */
3605 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
3606 /* Number of integer registers we need to save. */
3607 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
3608 /* First unnamed SFmode float reg */
3609 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
3610 /* Number of SFmode float regs to save. */
3611 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
3612 int ptrsize = GET_MODE_SIZE (Pmode);
3613 rtx valist, regbuf, fpregs;
3616 /* Allocate block of memory for the regs. */
3617 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
3618 Or can assign_stack_local accept a 0 SIZE argument? */
3619 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
3621 regbuf = assign_stack_local (BLKmode, bufsize, 0);
3622 MEM_IN_STRUCT_P (regbuf) = 1;
3625 This is optimized to only save the regs that are necessary. Explicitly
3626 named args need not be saved. */
3628 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
3629 gen_rtx (MEM, BLKmode,
3630 plus_constant (XEXP (regbuf, 0),
3631 n_floatregs * UNITS_PER_WORD)),
3632 n_intregs, n_intregs * UNITS_PER_WORD);
3635 This is optimized to only save the regs that are necessary. Explicitly
3636 named args need not be saved.
3637 We explicitly build a pointer to the buffer because it halves the insn
3638 count when not optimizing (otherwise the pointer is built for each reg
3640 We emit the moves in reverse order so that we can use predecrement. */
3642 fpregs = gen_reg_rtx (Pmode);
3643 emit_move_insn (fpregs, XEXP (regbuf, 0));
3644 emit_insn (gen_addsi3 (fpregs, fpregs,
3645 GEN_INT (n_floatregs * UNITS_PER_WORD)));
3646 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
3648 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
3649 emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
3650 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno));
3653 /* Return the address of the regbuf. */
3654 return XEXP (regbuf, 0);
3657 /* Define the offset between two registers, one to be eliminated, and
3658 the other its replacement, at the start of a routine. */
3661 initial_elimination_offset (from, to)
3666 int total_saved_regs_space;
3667 int total_auto_space = get_frame_size ();
3668 int save_flags = target_flags;
3670 int live_regs_mask, live_regs_mask2;
3671 live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2);
3672 target_flags = save_flags;
3674 total_saved_regs_space = (regs_saved) * 4;
3676 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
3677 return total_saved_regs_space + total_auto_space;
3679 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3680 return total_saved_regs_space + total_auto_space;
3682 /* Initial gap between fp and sp is 0. */
3683 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3686 if (from == RETURN_ADDRESS_POINTER_REGNUM
3687 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
3689 int i, n = total_saved_regs_space;
3690 for (i = PR_REG-1; i >= 0; i--)
3691 if (live_regs_mask & (1 << i))
3693 return n + total_auto_space;
3699 /* Handle machine specific pragmas to be semi-compatible with Hitachi
3703 sh_handle_pragma (p_getc, p_ungetc, pname)
3704 int (* p_getc) PROTO((void));
3705 void (* p_ungetc) PROTO((int));
3710 if (strcmp (pname, "interrupt") == 0)
3711 pragma_interrupt = retval = 1;
3712 else if (strcmp (pname, "trapa") == 0)
3713 pragma_interrupt = pragma_trapa = retval = 1;
3714 else if (strcmp (pname, "nosave_low_regs") == 0)
3715 pragma_nosave_low_regs = retval = 1;
3719 /* Return nonzero if ATTR is a valid attribute for DECL.
3720 ATTRIBUTES are any existing attributes and ARGS are the arguments
3723 Supported attributes:
3725 interrupt_handler -- specifies this function is an interrupt handler.
3727 sp_switch -- specifies an alternate stack for an interrupt handler
3730 trap_exit -- use a trapa to exit an interrupt function instead of
3731 an rte instruction. */
3734 sh_valid_machine_decl_attribute (decl, attributes, attr, args)
3742 if (TREE_CODE (decl) != FUNCTION_DECL)
3745 if (is_attribute_p ("interrupt_handler", attr))
3747 pragma_interrupt = 1;
3751 if (is_attribute_p ("sp_switch", attr))
3753 /* The sp_switch attribute only has meaning for interrupt functions. */
3754 if (!pragma_interrupt)
3757 /* sp_switch must have an argument. */
3758 if (!args || TREE_CODE (args) != TREE_LIST)
3761 /* The argument must be a constant string. */
3762 if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
3765 sp_switch = gen_rtx (SYMBOL_REF, VOIDmode,
3766 TREE_STRING_POINTER (TREE_VALUE (args)));
3770 if (is_attribute_p ("trap_exit", attr))
3772 /* The trap_exit attribute only has meaning for interrupt functions. */
3773 if (!pragma_interrupt)
3776 /* trap_exit must have an argument. */
3777 if (!args || TREE_CODE (args) != TREE_LIST)
3780 /* The argument must be a constant integer. */
3781 if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
3784 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
3790 /* Predicates used by the templates. */
3792 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
3793 Used only in general_movsrc_operand. */
3796 system_reg_operand (op, mode)
3798 enum machine_mode mode;
3810 /* Returns 1 if OP can be source of a simple move operation.
3811 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
3812 invalid as are subregs of system registers. */
3815 general_movsrc_operand (op, mode)
3817 enum machine_mode mode;
3819 if (GET_CODE (op) == MEM)
3821 rtx inside = XEXP (op, 0);
3822 if (GET_CODE (inside) == CONST)
3823 inside = XEXP (inside, 0);
3825 if (GET_CODE (inside) == LABEL_REF)
3828 if (GET_CODE (inside) == PLUS
3829 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
3830 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
3833 /* Only post inc allowed. */
3834 if (GET_CODE (inside) == PRE_DEC)
3838 if ((mode == QImode || mode == HImode)
3839 && (GET_CODE (op) == SUBREG
3840 && GET_CODE (XEXP (op, 0)) == REG
3841 && system_reg_operand (XEXP (op, 0), mode)))
3844 return general_operand (op, mode);
3847 /* Returns 1 if OP can be a destination of a move.
3848 Same as general_operand, but no preinc allowed. */
3851 general_movdst_operand (op, mode)
3853 enum machine_mode mode;
3855 /* Only pre dec allowed. */
3856 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
3859 return general_operand (op, mode);
3862 /* Returns 1 if OP is a normal arithmetic register. */
3865 arith_reg_operand (op, mode)
3867 enum machine_mode mode;
3869 if (register_operand (op, mode))
3873 if (GET_CODE (op) == REG)
3875 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
3876 regno = REGNO (SUBREG_REG (op));
3880 return (regno != T_REG && regno != PR_REG && regno != FPUL_REG
3881 && regno != MACH_REG && regno != MACL_REG);
3886 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
3889 arith_operand (op, mode)
3891 enum machine_mode mode;
3893 if (arith_reg_operand (op, mode))
3896 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
3902 /* Returns 1 if OP is a valid source operand for a compare insn. */
3905 arith_reg_or_0_operand (op, mode)
3907 enum machine_mode mode;
3909 if (arith_reg_operand (op, mode))
3912 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
3918 /* Returns 1 if OP is a valid source operand for a logical operation. */
3921 logical_operand (op, mode)
3923 enum machine_mode mode;
3925 if (arith_reg_operand (op, mode))
3928 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
3934 /* Nonzero if OP is a floating point value with value 0.0. */
3937 fp_zero_operand (op)
3942 if (GET_MODE (op) != SFmode)
3945 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
3946 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
3949 /* Nonzero if OP is a floating point value with value 1.0. */
3957 if (GET_MODE (op) != SFmode)
3960 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
3961 return REAL_VALUES_EQUAL (r, dconst1);
3965 braf_label_ref_operand(op, mode)
3967 enum machine_mode mode;
3971 if (GET_CODE (op) != LABEL_REF)
3973 prev = prev_real_insn (XEXP (op, 0));
3974 if (GET_CODE (prev) != JUMP_INSN)
3976 prev = PATTERN (prev);
3977 if (GET_CODE (prev) != PARALLEL || XVECLEN (prev, 0) != 2)
3979 prev = XVECEXP (prev, 0, 0);
3980 if (GET_CODE (prev) != SET)
3982 prev = SET_SRC (prev);
3983 if (GET_CODE (prev) != PLUS || XEXP (prev, 1) != op)
3987 /* Return the destination address of a branch. */
3990 branch_dest (branch)
3993 rtx dest = SET_SRC (PATTERN (branch));
3996 if (GET_CODE (dest) == IF_THEN_ELSE)
3997 dest = XEXP (dest, 1);
3998 dest = XEXP (dest, 0);
3999 dest_uid = INSN_UID (dest);
4000 return insn_addresses[dest_uid];
4003 /* Return non-zero if REG is not used after INSN.
4004 We assume REG is a reload reg, and therefore does
4005 not live past labels. It may live past calls or jumps though. */
4007 reg_unused_after (reg, insn)
4014 /* If the reg is set by this instruction, then it is safe for our
4015 case. Disregard the case where this is a store to memory, since
4016 we are checking a register used in the store address. */
4017 set = single_set (insn);
4018 if (set && GET_CODE (SET_DEST (set)) != MEM
4019 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4022 while (insn = NEXT_INSN (insn))
4024 code = GET_CODE (insn);
4027 /* If this is a label that existed before reload, then the register
4028 if dead here. However, if this is a label added by reorg, then
4029 the register may still be live here. We can't tell the difference,
4030 so we just ignore labels completely. */
4031 if (code == CODE_LABEL)
4036 if (code == JUMP_INSN)
4039 /* If this is a sequence, we must handle them all at once.
4040 We could have for instance a call that sets the target register,
4041 and a insn in a delay slot that uses the register. In this case,
4042 we must return 0. */
4043 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4048 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
4050 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
4051 rtx set = single_set (this_insn);
4053 if (GET_CODE (this_insn) == CALL_INSN)
4055 else if (GET_CODE (this_insn) == JUMP_INSN)
4057 if (INSN_ANNULLED_BRANCH_P (this_insn))
4062 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4064 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4066 if (GET_CODE (SET_DEST (set)) != MEM)
4072 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
4077 else if (code == JUMP_INSN)
4080 else if (GET_RTX_CLASS (code) == 'i')
4082 rtx set = single_set (insn);
4084 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4086 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4087 return GET_CODE (SET_DEST (set)) != MEM;
4088 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
4092 if (code == CALL_INSN && call_used_regs[REGNO (reg)])