1 /* Output routines for GCC for Hitachi Super-H.
2 Copyright (C) 1993-1998 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
21 /* Contributed by Steve Chamberlain (sac@cygnus.com).
22 Improved by Jim Wilson (wilson@cygnus.com). */
31 #include "insn-flags.h"
34 #include "hard-reg-set.h"
36 #include "insn-attr.h"
38 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
40 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
41 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
43 /* ??? The pragma interrupt support will not work for SH3. */
44 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
45 output code for the next function appropriate for an interrupt handler. */
48 /* This is set by the trap_exit attribute for functions. It specifies
49 a trap number to be used in a trapa instruction at function exit
50 (instead of an rte instruction). */
53 /* This is used by the sp_switch attribute for functions. It specifies
54 a variable holding the address of the stack the interrupt function
55 should switch to/from at entry/exit. */
58 /* This is set by #pragma trapa, and is similar to the above, except that
59 the compiler doesn't emit code to preserve all registers. */
60 static int pragma_trapa;
62 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
63 which has a separate set of low regs for User and Supervisor modes.
64 This should only be used for the lowest level of interrupts. Higher levels
65 of interrupts must save the registers in case they themselves are
67 int pragma_nosave_low_regs;
69 /* This is used for communication between SETUP_INCOMING_VARARGS and
70 sh_expand_prologue. */
71 int current_function_anonymous_args;
73 /* Global variables from toplev.c and final.c that are used within, but
74 not declared in any header file. */
75 extern char *version_string;
76 extern int *insn_addresses;
78 /* Global variables for machine-dependent things. */
80 /* Which cpu are we scheduling for. */
81 enum processor_type sh_cpu;
83 /* Saved operands from the last compare to use when we generate an scc
89 enum machine_mode sh_addr_diff_vec_mode;
91 /* Provides the class number of the smallest class containing
94 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
96 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
97 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
98 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
99 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
100 GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
101 MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
102 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
103 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
104 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
105 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
106 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
107 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
111 char fp_reg_names[][5] =
113 "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
114 "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
116 "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
119 /* Provide reg_class from a letter such as appears in the machine
122 enum reg_class reg_class_from_letter[] =
124 /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
125 /* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
126 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
127 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
128 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
129 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
130 /* y */ FPUL_REGS, /* z */ R0_REGS
133 int assembler_dialect;
135 rtx get_fpscr_rtx ();
136 void emit_sf_insn ();
137 void emit_df_insn ();
139 static void split_branches PROTO ((rtx));
141 /* Print the operand address in x to the stream. */
144 print_operand_address (stream, x)
148 switch (GET_CODE (x))
152 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
157 rtx base = XEXP (x, 0);
158 rtx index = XEXP (x, 1);
160 switch (GET_CODE (index))
163 fprintf (stream, "@(%d,%s)", INTVAL (index),
164 reg_names[true_regnum (base)]);
170 int base_num = true_regnum (base);
171 int index_num = true_regnum (index);
173 fprintf (stream, "@(r0,%s)",
174 reg_names[MAX (base_num, index_num)]);
186 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
190 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
194 output_addr_const (stream, x);
199 /* Print operand x (an rtx) in assembler syntax to file stream
200 according to modifier code.
202 '.' print a .s if insn needs delay slot
203 ',' print LOCAL_LABEL_PREFIX
204 '@' print trap, rte or rts depending upon pragma interruptness
205 '#' output a nop if there is nothing to put in the delay slot
206 'O' print a constant without the #
207 'R' print the LSW of a dp value - changes if in little endian
208 'S' print the MSW of a dp value - changes if in little endian
209 'T' print the next word of a dp value - same as 'R' in big endian mode.
210 'o' output an operator. */
213 print_operand (stream, x, code)
222 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
223 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
226 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
230 int interrupt_handler;
232 if ((lookup_attribute
233 ("interrupt_handler",
234 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
236 interrupt_handler = 1;
238 interrupt_handler = 0;
241 fprintf (stream, "trapa #%d", trap_exit);
242 else if (interrupt_handler)
243 fprintf (stream, "rte");
245 fprintf (stream, "rts");
249 /* Output a nop if there's nothing in the delay slot. */
250 if (dbr_sequence_length () == 0)
251 fprintf (stream, "\n\tnop");
254 output_addr_const (stream, x);
257 fputs (reg_names[REGNO (x) + LSW], (stream));
260 fputs (reg_names[REGNO (x) + MSW], (stream));
263 /* Next word of a double. */
264 switch (GET_CODE (x))
267 fputs (reg_names[REGNO (x) + 1], (stream));
270 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
271 && GET_CODE (XEXP (x, 0)) != POST_INC)
272 x = adj_offsettable_operand (x, 4);
273 print_operand_address (stream, XEXP (x, 0));
278 switch (GET_CODE (x))
280 case PLUS: fputs ("add", stream); break;
281 case MINUS: fputs ("sub", stream); break;
282 case MULT: fputs ("mul", stream); break;
283 case DIV: fputs ("div", stream); break;
287 switch (GET_CODE (x))
290 if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG
291 && GET_MODE_SIZE (GET_MODE (x)) > 4)
292 fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
294 fputs (reg_names[REGNO (x)], (stream));
297 output_address (XEXP (x, 0));
301 output_addr_const (stream, x);
308 static void force_into PROTO ((rtx, rtx));
310 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
312 force_into (value, target)
315 value = force_operand (value, target);
316 if (! rtx_equal_p (value, target))
317 emit_insn (gen_move_insn (target, value));
320 /* Emit code to perform a block move. Choose the best method.
322 OPERANDS[0] is the destination.
323 OPERANDS[1] is the source.
324 OPERANDS[2] is the size.
325 OPERANDS[3] is the alignment safe to use. */
328 expand_block_move (operands)
331 int align = INTVAL (operands[3]);
332 int constp = (GET_CODE (operands[2]) == CONST_INT);
333 int bytes = (constp ? INTVAL (operands[2]) : 0);
335 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
336 alignment, or if it isn't a multiple of 4 bytes, then fail. */
337 if (! constp || align < 4 || (bytes % 4 != 0))
344 else if (bytes == 12)
348 rtx r4 = gen_rtx (REG, SImode, 4);
349 rtx r5 = gen_rtx (REG, SImode, 5);
351 entry_name = get_identifier ("__movstrSI12_i4");
354 = copy_to_mode_reg (Pmode,
355 gen_rtx_SYMBOL_REF (Pmode,
356 IDENTIFIER_POINTER (entry_name)));
357 force_into (XEXP (operands[0], 0), r4);
358 force_into (XEXP (operands[1], 0), r5);
359 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
362 else if (! TARGET_SMALLCODE)
367 rtx r4 = gen_rtx (REG, SImode, 4);
368 rtx r5 = gen_rtx (REG, SImode, 5);
369 rtx r6 = gen_rtx (REG, SImode, 6);
371 entry_name = get_identifier (bytes & 4
373 : "__movstr_i4_even");
375 = copy_to_mode_reg (Pmode,
376 gen_rtx_SYMBOL_REF (Pmode,
377 IDENTIFIER_POINTER (entry_name)));
378 force_into (XEXP (operands[0], 0), r4);
379 force_into (XEXP (operands[1], 0), r5);
382 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
383 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
394 rtx r4 = gen_rtx (REG, SImode, 4);
395 rtx r5 = gen_rtx (REG, SImode, 5);
397 sprintf (entry, "__movstrSI%d", bytes);
398 entry_name = get_identifier (entry);
401 = copy_to_mode_reg (Pmode,
402 gen_rtx (SYMBOL_REF, Pmode,
403 IDENTIFIER_POINTER (entry_name)));
404 force_into (XEXP (operands[0], 0), r4);
405 force_into (XEXP (operands[1], 0), r5);
406 emit_insn (gen_block_move_real (func_addr_rtx));
410 /* This is the same number of bytes as a memcpy call, but to a different
411 less common function name, so this will occasionally use more space. */
412 if (! TARGET_SMALLCODE)
416 int final_switch, while_loop;
417 rtx r4 = gen_rtx (REG, SImode, 4);
418 rtx r5 = gen_rtx (REG, SImode, 5);
419 rtx r6 = gen_rtx (REG, SImode, 6);
421 entry_name = get_identifier ("__movstr");
423 = copy_to_mode_reg (Pmode,
424 gen_rtx (SYMBOL_REF, Pmode,
425 IDENTIFIER_POINTER (entry_name)));
426 force_into (XEXP (operands[0], 0), r4);
427 force_into (XEXP (operands[1], 0), r5);
429 /* r6 controls the size of the move. 16 is decremented from it
430 for each 64 bytes moved. Then the negative bit left over is used
431 as an index into a list of move instructions. e.g., a 72 byte move
432 would be set up with size(r6) = 14, for one iteration through the
433 big while loop, and a switch of -2 for the last part. */
435 final_switch = 16 - ((bytes / 4) % 16);
436 while_loop = ((bytes / 4) / 16 - 1) * 16;
437 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
438 emit_insn (gen_block_lump_real (func_addr_rtx));
445 /* Prepare operands for a move define_expand; specifically, one of the
446 operands must be in a register. */
449 prepare_move_operands (operands, mode)
451 enum machine_mode mode;
453 if (! reload_in_progress && ! reload_completed)
455 /* Copy the source to a register if both operands aren't registers. */
456 if (! register_operand (operands[0], mode)
457 && ! register_operand (operands[1], mode))
458 operands[1] = copy_to_mode_reg (mode, operands[1]);
460 /* This case can happen while generating code to move the result
461 of a library call to the target. Reject `st r0,@(rX,rY)' because
462 reload will fail to find a spill register for rX, since r0 is already
463 being used for the source. */
464 else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
465 && GET_CODE (operands[0]) == MEM
466 && GET_CODE (XEXP (operands[0], 0)) == PLUS
467 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
468 operands[1] = copy_to_mode_reg (mode, operands[1]);
474 /* Prepare the operands for an scc instruction; make sure that the
475 compare has been done. */
477 prepare_scc_operands (code)
480 rtx t_reg = gen_rtx (REG, SImode, T_REG);
481 enum rtx_code oldcode = code;
482 enum machine_mode mode;
484 /* First need a compare insn. */
488 /* It isn't possible to handle this case. */
505 rtx tmp = sh_compare_op0;
506 sh_compare_op0 = sh_compare_op1;
507 sh_compare_op1 = tmp;
510 mode = GET_MODE (sh_compare_op0);
511 if (mode == VOIDmode)
512 mode = GET_MODE (sh_compare_op1);
514 sh_compare_op0 = force_reg (mode, sh_compare_op0);
515 if ((code != EQ && code != NE
516 && (sh_compare_op1 != const0_rtx
517 || code == GTU || code == GEU || code == LTU || code == LEU))
518 || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
519 sh_compare_op1 = force_reg (mode, sh_compare_op1);
521 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
522 (mode == SFmode ? emit_sf_insn : emit_df_insn)
523 (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
524 gen_rtx (SET, VOIDmode, t_reg,
525 gen_rtx (code, SImode,
526 sh_compare_op0, sh_compare_op1)),
527 gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
529 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
530 gen_rtx (code, SImode, sh_compare_op0,
536 /* Called from the md file, set up the operands of a compare instruction. */
539 from_compare (operands, code)
543 enum machine_mode mode = GET_MODE (sh_compare_op0);
545 if (mode == VOIDmode)
546 mode = GET_MODE (sh_compare_op1);
549 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
551 /* Force args into regs, since we can't use constants here. */
552 sh_compare_op0 = force_reg (mode, sh_compare_op0);
553 if (sh_compare_op1 != const0_rtx
554 || code == GTU || code == GEU
555 || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
556 sh_compare_op1 = force_reg (mode, sh_compare_op1);
558 if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
560 from_compare (operands, GT);
561 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
564 insn = gen_rtx (SET, VOIDmode,
565 gen_rtx (REG, SImode, 18),
566 gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
567 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
569 insn = gen_rtx (PARALLEL, VOIDmode,
571 gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
572 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
578 /* Functions to output assembly code. */
580 /* Return a sequence of instructions to perform DI or DF move.
582 Since the SH cannot move a DI or DF in one instruction, we have
583 to take care when we see overlapping source and dest registers. */
586 output_movedouble (insn, operands, mode)
589 enum machine_mode mode;
591 rtx dst = operands[0];
592 rtx src = operands[1];
594 if (GET_CODE (dst) == MEM
595 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
596 return "mov.l %T1,%0\n\tmov.l %1,%0";
598 if (register_operand (dst, mode)
599 && register_operand (src, mode))
601 if (REGNO (src) == MACH_REG)
602 return "sts mach,%S0\n\tsts macl,%R0";
604 /* When mov.d r1,r2 do r2->r3 then r1->r2;
605 when mov.d r1,r0 do r1->r0 then r2->r1. */
607 if (REGNO (src) + 1 == REGNO (dst))
608 return "mov %T1,%T0\n\tmov %1,%0";
610 return "mov %1,%0\n\tmov %T1,%T0";
612 else if (GET_CODE (src) == CONST_INT)
614 if (INTVAL (src) < 0)
615 output_asm_insn ("mov #-1,%S0", operands);
617 output_asm_insn ("mov #0,%S0", operands);
621 else if (GET_CODE (src) == MEM)
624 int dreg = REGNO (dst);
625 rtx inside = XEXP (src, 0);
627 if (GET_CODE (inside) == REG)
628 ptrreg = REGNO (inside);
629 else if (GET_CODE (inside) == SUBREG)
630 ptrreg = REGNO (SUBREG_REG (inside)) + SUBREG_WORD (inside);
631 else if (GET_CODE (inside) == PLUS)
633 ptrreg = REGNO (XEXP (inside, 0));
634 /* ??? A r0+REG address shouldn't be possible here, because it isn't
635 an offsettable address. Unfortunately, offsettable addresses use
636 QImode to check the offset, and a QImode offsettable address
637 requires r0 for the other operand, which is not currently
638 supported, so we can't use the 'o' constraint.
639 Thus we must check for and handle r0+REG addresses here.
640 We punt for now, since this is likely very rare. */
641 if (GET_CODE (XEXP (inside, 1)) == REG)
644 else if (GET_CODE (inside) == LABEL_REF)
645 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
646 else if (GET_CODE (inside) == POST_INC)
647 return "mov.l %1,%0\n\tmov.l %1,%T0";
651 /* Work out the safe way to copy. Copy into the second half first. */
653 return "mov.l %T1,%T0\n\tmov.l %1,%0";
656 return "mov.l %1,%0\n\tmov.l %T1,%T0";
659 /* Print an instruction which would have gone into a delay slot after
660 another instruction, but couldn't because the other instruction expanded
661 into a sequence where putting the slot insn at the end wouldn't work. */
667 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
669 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
673 output_far_jump (insn, op)
677 struct { rtx lab, reg, op; } this;
680 int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)];
682 this.lab = gen_label_rtx ();
686 && offset - get_attr_length (insn) <= 32766)
689 jump = "mov.w %O0,%1;braf %1";
694 jump = "mov.l %O0,%1;jmp @%1";
696 /* If we have a scratch register available, use it. */
697 if (GET_CODE (PREV_INSN (insn)) == INSN
698 && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
700 this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
701 output_asm_insn (jump, &this.lab);
702 if (dbr_sequence_length ())
703 print_slot (final_sequence);
705 output_asm_insn ("nop", 0);
709 /* Output the delay slot insn first if any. */
710 if (dbr_sequence_length ())
711 print_slot (final_sequence);
713 this.reg = gen_rtx (REG, SImode, 13);
714 output_asm_insn ("mov.l r13,@-r15", 0);
715 output_asm_insn (jump, &this.lab);
716 output_asm_insn ("mov.l @r15+,r13", 0);
719 output_asm_insn (".align 2", 0);
720 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
722 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
726 /* Local label counter, used for constants in the pool and inside
731 /* Output code for ordinary branches. */
734 output_branch (logic, insn, operands)
739 switch (get_attr_length (insn))
742 /* This can happen if filling the delay slot has caused a forward
743 branch to exceed its range (we could reverse it, but only
744 when we know we won't overextend other branches; this should
745 best be handled by relaxation).
746 It can also happen when other condbranches hoist delay slot insn
747 from their destination, thus leading to code size increase.
748 But the branch will still be in the range -4092..+4098 bytes. */
753 /* The call to print_slot will clobber the operands. */
754 rtx op0 = operands[0];
756 /* If the instruction in the delay slot is annulled (true), then
757 there is no delay slot where we can put it now. The only safe
758 place for it is after the label. final will do that by default. */
761 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
763 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
764 ASSEMBLER_DIALECT ? "/" : ".", label);
765 print_slot (final_sequence);
768 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
770 output_asm_insn ("bra\t%l0", &op0);
771 fprintf (asm_out_file, "\tnop\n");
772 ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
776 /* When relaxing, handle this like a short branch. The linker
777 will fix it up if it still doesn't fit after relaxation. */
779 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
786 output_branchy_insn (code, template, insn, operands)
792 rtx next_insn = NEXT_INSN (insn);
795 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
797 rtx src = SET_SRC (PATTERN (next_insn));
798 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
800 /* Following branch not taken */
801 operands[9] = gen_label_rtx ();
802 emit_label_after (operands[9], next_insn);
807 int offset = (branch_dest (next_insn)
808 - insn_addresses[INSN_UID (next_insn)] + 4);
809 if (offset >= -252 && offset <= 258)
811 if (GET_CODE (src) == IF_THEN_ELSE)
819 operands[9] = gen_label_rtx ();
820 emit_label_after (operands[9], insn);
825 output_ieee_ccmpeq (insn, operands)
828 output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
831 /* Output to FILE the start of the assembler file. */
834 output_file_start (file)
839 output_file_directive (file, main_input_filename);
841 /* Switch to the data section so that the coffsem symbol and the
842 gcc2_compiled. symbol aren't in the text section. */
845 if (TARGET_LITTLE_ENDIAN)
846 fprintf (file, "\t.little\n");
849 /* Actual number of instructions used to make a shift by N. */
850 static char ashiftrt_insns[] =
851 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
853 /* Left shift and logical right shift are the same. */
854 static char shift_insns[] =
855 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
857 /* Individual shift amounts needed to get the above length sequences.
858 One bit right shifts clobber the T bit, so when possible, put one bit
859 shifts in the middle of the sequence, so the ends are eligible for
860 branch delay slots. */
861 static short shift_amounts[32][5] = {
862 {0}, {1}, {2}, {2, 1},
863 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
864 {8}, {8, 1}, {8, 2}, {8, 1, 2},
865 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
866 {16}, {16, 1}, {16, 2}, {16, 1, 2},
867 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
868 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
869 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
871 /* Likewise, but for shift amounts < 16, up to three highmost bits
872 might be clobbered. This is typically used when combined with some
873 kind of sign or zero extension. */
875 static char ext_shift_insns[] =
876 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
878 static short ext_shift_amounts[32][4] = {
879 {0}, {1}, {2}, {2, 1},
880 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
881 {8}, {8, 1}, {8, 2}, {8, 1, 2},
882 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
883 {16}, {16, 1}, {16, 2}, {16, 1, 2},
884 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
885 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
886 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
888 /* Assuming we have a value that has been sign-extended by at least one bit,
889 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
890 to shift it by N without data loss, and quicker than by other means? */
891 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
893 /* This is used in length attributes in sh.md to help compute the length
894 of arbitrary constant shift instructions. */
897 shift_insns_rtx (insn)
900 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
901 int shift_count = INTVAL (XEXP (set_src, 1));
902 enum rtx_code shift_code = GET_CODE (set_src);
907 return ashiftrt_insns[shift_count];
910 return shift_insns[shift_count];
916 /* Return the cost of a shift. */
922 int value = INTVAL (XEXP (x, 1));
924 /* If shift by a non constant, then this will be expensive. */
925 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
926 return SH_DYNAMIC_SHIFT_COST;
928 /* Otherwise, return the true cost in instructions. */
929 if (GET_CODE (x) == ASHIFTRT)
931 int cost = ashiftrt_insns[value];
932 /* If SH3, then we put the constant in a reg and use shad. */
933 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
934 cost = 1 + SH_DYNAMIC_SHIFT_COST;
938 return shift_insns[value];
941 /* Return the cost of an AND operation. */
949 /* Anding with a register is a single cycle and instruction. */
950 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
953 i = INTVAL (XEXP (x, 1));
954 /* These constants are single cycle extu.[bw] instructions. */
955 if (i == 0xff || i == 0xffff)
957 /* Constants that can be used in an and immediate instruction is a single
958 cycle, but this requires r0, so make it a little more expensive. */
959 if (CONST_OK_FOR_L (i))
961 /* Constants that can be loaded with a mov immediate and an and.
962 This case is probably unnecessary. */
963 if (CONST_OK_FOR_I (i))
965 /* Any other constants requires a 2 cycle pc-relative load plus an and.
966 This case is probably unnecessary. */
970 /* Return the cost of a multiply. */
977 /* We have a mul insn, so we can never take more than the mul and the
978 read of the mac reg, but count more because of the latency and extra
980 if (TARGET_SMALLCODE)
985 /* If we're aiming at small code, then just count the number of
986 insns in a multiply call sequence. */
987 if (TARGET_SMALLCODE)
990 /* Otherwise count all the insns in the routine we'd be calling too. */
994 /* Code to expand a shift. */
997 gen_ashift (type, n, reg)
1002 /* Negative values here come from the shift_amounts array. */
1015 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1019 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1021 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1024 emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n)));
1029 /* Same for HImode */
1032 gen_ashift_hi (type, n, reg)
1037 /* Negative values here come from the shift_amounts array. */
1051 /* We don't have HImode right shift operations because using the
1052 ordinary 32 bit shift instructions for that doesn't generate proper
1053 zero/sign extension.
1054 gen_ashift_hi is only called in contexts where we know that the
1055 sign extension works out correctly. */
1058 if (GET_CODE (reg) == SUBREG)
1060 word = SUBREG_WORD (reg);
1061 reg = SUBREG_REG (reg);
1063 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word));
1067 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1072 /* Output RTL to split a constant shift into its component SH constant
1073 shift instructions. */
1076 gen_shifty_op (code, operands)
1080 int value = INTVAL (operands[2]);
1083 /* Truncate the shift count in case it is out of bounds. */
1084 value = value & 0x1f;
1088 if (code == LSHIFTRT)
1090 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1091 emit_insn (gen_movt (operands[0]));
1094 else if (code == ASHIFT)
1096 /* There is a two instruction sequence for 31 bit left shifts,
1097 but it requires r0. */
1098 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1100 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1101 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1106 else if (value == 0)
1108 /* This can happen when not optimizing. We must output something here
1109 to prevent the compiler from aborting in final.c after the try_split
1111 emit_insn (gen_nop ());
1115 max = shift_insns[value];
1116 for (i = 0; i < max; i++)
1117 gen_ashift (code, shift_amounts[value][i], operands[0]);
1120 /* Same as above, but optimized for values where the topmost bits don't
1124 gen_shifty_hi_op (code, operands)
1128 int value = INTVAL (operands[2]);
1132 /* This operation is used by and_shl for SImode values with a few
1133 high bits known to be cleared. */
1137 emit_insn (gen_nop ());
1141 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1144 max = ext_shift_insns[value];
1145 for (i = 0; i < max; i++)
1146 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1149 /* When shifting right, emit the shifts in reverse order, so that
1150 solitary negative values come first. */
1151 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1152 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1155 /* Output RTL for an arithmetic right shift. */
1157 /* ??? Rewrite to use super-optimizer sequences. */
1160 expand_ashiftrt (operands)
1170 if (GET_CODE (operands[2]) != CONST_INT)
1172 rtx count = copy_to_mode_reg (SImode, operands[2]);
1173 emit_insn (gen_negsi2 (count, count));
1174 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1177 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1178 > 1 + SH_DYNAMIC_SHIFT_COST)
1181 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
1182 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1186 if (GET_CODE (operands[2]) != CONST_INT)
1189 value = INTVAL (operands[2]) & 31;
1193 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
1196 else if (value >= 16 && value <= 19)
1198 wrk = gen_reg_rtx (SImode);
1199 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
1202 gen_ashift (ASHIFTRT, 1, wrk);
1203 emit_move_insn (operands[0], wrk);
1206 /* Expand a short sequence inline, longer call a magic routine. */
1207 else if (value <= 5)
1209 wrk = gen_reg_rtx (SImode);
1210 emit_move_insn (wrk, operands[1]);
1212 gen_ashift (ASHIFTRT, 1, wrk);
1213 emit_move_insn (operands[0], wrk);
1217 wrk = gen_reg_rtx (Pmode);
1219 /* Load the value into an arg reg and call a helper. */
1220 emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
1221 sprintf (func, "__ashiftrt_r4_%d", value);
1222 func_name = get_identifier (func);
1223 emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode,
1224 IDENTIFIER_POINTER (func_name)));
1225 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
1226 emit_move_insn (operands[0], gen_rtx (REG, SImode, 4));
1230 int sh_dynamicalize_shift_p (count)
1233 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
1236 /* Try to find a good way to implement the combiner pattern
1237 [(set (match_operand:SI 0 "register_operand" "r")
1238 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1239 (match_operand:SI 2 "const_int_operand" "n"))
1240 (match_operand:SI 3 "const_int_operand" "n"))) .
1241 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
1242 return 0 for simple right / left or left/right shift combination.
1243 return 1 for a combination of shifts with zero_extend.
1244 return 2 for a combination of shifts with an AND that needs r0.
1245 return 3 for a combination of shifts with an AND that needs an extra
1246 scratch register, when the three highmost bits of the AND mask are clear.
1247 return 4 for a combination of shifts with an AND that needs an extra
1248 scratch register, when any of the three highmost bits of the AND mask
1250 If ATTRP is set, store an initial right shift width in ATTRP[0],
1251 and the instruction length in ATTRP[1] . These values are not valid
1253 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
1254 shift_amounts for the last shift value that is to be used before the
1257 shl_and_kind (left_rtx, mask_rtx, attrp)
1258 rtx left_rtx, mask_rtx;
1261 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
1262 int left = INTVAL (left_rtx), right;
1264 int cost, best_cost = 10000;
1265 int best_right = 0, best_len = 0;
1269 if (left < 0 || left > 31)
1271 if (GET_CODE (mask_rtx) == CONST_INT)
1272 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
1274 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
1275 /* Can this be expressed as a right shift / left shift pair ? */
1276 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
1277 right = exact_log2 (lsb);
1278 mask2 = ~(mask + lsb - 1);
1279 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
1280 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
1282 best_cost = shift_insns[right] + shift_insns[right + left];
1283 /* mask has no trailing zeroes <==> ! right */
1284 else if (! right && mask2 == ~(lsb2 - 1))
1286 int late_right = exact_log2 (lsb2);
1287 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
1289 /* Try to use zero extend */
1290 if (mask2 == ~(lsb2 - 1))
1294 for (width = 8; width <= 16; width += 8)
1296 /* Can we zero-extend right away? */
1297 if (lsb2 == (HOST_WIDE_INT)1 << width)
1300 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
1301 if (cost < best_cost)
1312 /* ??? Could try to put zero extend into initial right shift,
1313 or even shift a bit left before the right shift. */
1314 /* Determine value of first part of left shift, to get to the
1315 zero extend cut-off point. */
1316 first = width - exact_log2 (lsb2) + right;
1317 if (first >= 0 && right + left - first >= 0)
1319 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
1320 + ext_shift_insns[right + left - first];
1321 if (cost < best_cost)
1333 /* Try to use r0 AND pattern */
1334 for (i = 0; i <= 2; i++)
1338 if (! CONST_OK_FOR_L (mask >> i))
1340 cost = (i != 0) + 2 + ext_shift_insns[left + i];
1341 if (cost < best_cost)
1346 best_len = cost - 1;
1349 /* Try to use a scratch register to hold the AND operand. */
1350 can_ext = ((mask << left) & 0xe0000000) == 0;
1351 for (i = 0; i <= 2; i++)
1355 cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
1356 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
1357 if (cost < best_cost)
1362 best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
1368 attrp[0] = best_right;
1369 attrp[1] = best_len;
1374 /* This is used in length attributes of the unnamed instructions
1375 corresponding to shl_and_kind return values of 1 and 2. */
1377 shl_and_length (insn)
1380 rtx set_src, left_rtx, mask_rtx;
1383 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1384 left_rtx = XEXP (XEXP (set_src, 0), 1);
1385 mask_rtx = XEXP (set_src, 1);
1386 shl_and_kind (left_rtx, mask_rtx, attributes);
1387 return attributes[1];
1390 /* This is used in length attribute of the and_shl_scratch instruction. */
1393 shl_and_scr_length (insn)
1396 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1397 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
1398 rtx op = XEXP (set_src, 0);
1399 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
1400 op = XEXP (XEXP (op, 0), 0);
1401 return len + shift_insns[INTVAL (XEXP (op, 1))];
1404 /* Generating rtl? */
1405 extern int rtx_equal_function_value_matters;
1407 /* Generate rtl for instructions for which shl_and_kind advised a particular
1408 method of generating them, i.e. returned zero. */
1411 gen_shl_and (dest, left_rtx, mask_rtx, source)
1412 rtx dest, left_rtx, mask_rtx, source;
1415 unsigned HOST_WIDE_INT mask;
1416 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
1417 int right, total_shift;
1418 int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op;
1420 right = attributes[0];
1421 total_shift = INTVAL (left_rtx) + right;
1422 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
1429 int first = attributes[2];
1434 emit_insn ((mask << right) <= 0xff
1435 ? gen_zero_extendqisi2(dest,
1436 gen_lowpart (QImode, source))
1437 : gen_zero_extendhisi2(dest,
1438 gen_lowpart (HImode, source)));
1442 emit_insn (gen_movsi (dest, source));
1446 operands[2] = GEN_INT (right);
1447 gen_shifty_hi_op (LSHIFTRT, operands);
1451 operands[2] = GEN_INT (first);
1452 gen_shifty_hi_op (ASHIFT, operands);
1453 total_shift -= first;
1457 emit_insn (mask <= 0xff
1458 ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
1459 : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
1460 if (total_shift > 0)
1462 operands[2] = GEN_INT (total_shift);
1463 gen_shifty_hi_op (ASHIFT, operands);
1468 shift_gen_fun = gen_shifty_op;
1470 /* If the topmost bit that matters is set, set the topmost bits
1471 that don't matter. This way, we might be able to get a shorter
1473 if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift))
1474 mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
1476 /* Don't expand fine-grained when combining, because that will
1477 make the pattern fail. */
1478 if (rtx_equal_function_value_matters
1479 || reload_in_progress || reload_completed)
1483 /* Cases 3 and 4 should be handled by this split
1484 only while combining */
1489 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
1492 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
1497 operands[2] = GEN_INT (total_shift);
1498 shift_gen_fun (ASHIFT, operands);
1505 if (kind != 4 && total_shift < 16)
1507 neg = -ext_shift_amounts[total_shift][1];
1509 neg -= ext_shift_amounts[total_shift][2];
1513 emit_insn (gen_and_shl_scratch (dest, source,
1516 GEN_INT (total_shift + neg),
1518 emit_insn (gen_movsi (dest, dest));
1525 /* Try to find a good way to implement the combiner pattern
1526 [(set (match_operand:SI 0 "register_operand" "=r")
1527 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
1528 (match_operand:SI 2 "const_int_operand" "n")
1529 (match_operand:SI 3 "const_int_operand" "n")
1531 (clobber (reg:SI 18))]
1532 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
1533 return 0 for simple left / right shift combination.
1534 return 1 for left shift / 8 bit sign extend / left shift.
1535 return 2 for left shift / 16 bit sign extend / left shift.
1536 return 3 for left shift / 8 bit sign extend / shift / sign extend.
1537 return 4 for left shift / 16 bit sign extend / shift / sign extend.
1538 return 5 for left shift / 16 bit sign extend / right shift
1539 return 6 for < 8 bit sign extend / left shift.
1540 return 7 for < 8 bit sign extend / left shift / single right shift.
1541 If COSTP is nonzero, assign the calculated cost to *COSTP. */
1544 shl_sext_kind (left_rtx, size_rtx, costp)
1545 rtx left_rtx, size_rtx;
1548 int left, size, insize, ext;
1549 int cost, best_cost;
1552 left = INTVAL (left_rtx);
1553 size = INTVAL (size_rtx);
1554 insize = size - left;
1557 /* Default to left / right shift. */
1559 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
1562 /* 16 bit shift / sign extend / 16 bit shift */
1563 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
1564 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
1565 below, by alternative 3 or something even better. */
1566 if (cost < best_cost)
1572 /* Try a plain sign extend between two shifts. */
1573 for (ext = 16; ext >= insize; ext -= 8)
1577 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
1578 if (cost < best_cost)
1584 /* Check if we can do a sloppy shift with a final signed shift
1585 restoring the sign. */
1586 if (EXT_SHIFT_SIGNED (size - ext))
1587 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
1588 /* If not, maybe it's still cheaper to do the second shift sloppy,
1589 and do a final sign extend? */
1590 else if (size <= 16)
1591 cost = ext_shift_insns[ext - insize] + 1
1592 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
1595 if (cost < best_cost)
1597 kind = ext / 8U + 2;
1601 /* Check if we can sign extend in r0 */
1604 cost = 3 + shift_insns[left];
1605 if (cost < best_cost)
1610 /* Try the same with a final signed shift. */
1613 cost = 3 + ext_shift_insns[left + 1] + 1;
1614 if (cost < best_cost)
1623 /* Try to use a dynamic shift. */
1624 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
1625 if (cost < best_cost)
1636 /* Function to be used in the length attribute of the instructions
1637 implementing this pattern. */
1640 shl_sext_length (insn)
1643 rtx set_src, left_rtx, size_rtx;
1646 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1647 left_rtx = XEXP (XEXP (set_src, 0), 1);
1648 size_rtx = XEXP (set_src, 1);
1649 shl_sext_kind (left_rtx, size_rtx, &cost);
1653 /* Generate rtl for this pattern */
1656 gen_shl_sext (dest, left_rtx, size_rtx, source)
1657 rtx dest, left_rtx, size_rtx, source;
1660 int left, size, insize, cost;
1663 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
1664 left = INTVAL (left_rtx);
1665 size = INTVAL (size_rtx);
1666 insize = size - left;
1674 int ext = kind & 1 ? 8 : 16;
1675 int shift2 = size - ext;
1677 /* Don't expand fine-grained when combining, because that will
1678 make the pattern fail. */
1679 if (! rtx_equal_function_value_matters
1680 && ! reload_in_progress && ! reload_completed)
1682 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1683 emit_insn (gen_movsi (dest, source));
1687 emit_insn (gen_movsi (dest, source));
1691 operands[2] = GEN_INT (ext - insize);
1692 gen_shifty_hi_op (ASHIFT, operands);
1695 ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
1696 : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
1701 operands[2] = GEN_INT (shift2);
1702 gen_shifty_op (ASHIFT, operands);
1709 if (EXT_SHIFT_SIGNED (shift2))
1711 operands[2] = GEN_INT (shift2 + 1);
1712 gen_shifty_op (ASHIFT, operands);
1713 operands[2] = GEN_INT (1);
1714 gen_shifty_op (ASHIFTRT, operands);
1717 operands[2] = GEN_INT (shift2);
1718 gen_shifty_hi_op (ASHIFT, operands);
1722 operands[2] = GEN_INT (-shift2);
1723 gen_shifty_hi_op (LSHIFTRT, operands);
1725 emit_insn (size <= 8
1726 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
1727 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1734 if (! rtx_equal_function_value_matters
1735 && ! reload_in_progress && ! reload_completed)
1736 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1740 operands[2] = GEN_INT (16 - insize);
1741 gen_shifty_hi_op (ASHIFT, operands);
1742 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
1744 /* Don't use gen_ashrsi3 because it generates new pseudos. */
1746 gen_ashift (ASHIFTRT, 1, dest);
1751 /* Don't expand fine-grained when combining, because that will
1752 make the pattern fail. */
1753 if (! rtx_equal_function_value_matters
1754 && ! reload_in_progress && ! reload_completed)
1756 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
1757 emit_insn (gen_movsi (dest, source));
1760 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
1761 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
1762 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
1764 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
1765 gen_shifty_op (ASHIFT, operands);
1767 emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
1775 /* The SH cannot load a large constant into a register, constants have to
1776 come from a pc relative load. The reference of a pc relative load
1777 instruction must be less than 1k infront of the instruction. This
1778 means that we often have to dump a constant inside a function, and
1779 generate code to branch around it.
1781 It is important to minimize this, since the branches will slow things
1782 down and make things bigger.
1784 Worst case code looks like:
1802 We fix this by performing a scan before scheduling, which notices which
1803 instructions need to have their operands fetched from the constant table
1804 and builds the table.
1808 scan, find an instruction which needs a pcrel move. Look forward, find the
1809 last barrier which is within MAX_COUNT bytes of the requirement.
1810 If there isn't one, make one. Process all the instructions between
1811 the find and the barrier.
1813 In the above example, we can tell that L3 is within 1k of L1, so
1814 the first move can be shrunk from the 3 insn+constant sequence into
1815 just 1 insn, and the constant moved to L3 to make:
1826 Then the second move becomes the target for the shortening process. */
1830 rtx value; /* Value in table. */
1831 rtx label; /* Label of value. */
1832 enum machine_mode mode; /* Mode of value. */
1835 /* The maximum number of constants that can fit into one pool, since
1836 the pc relative range is 0...1020 bytes and constants are at least 4
1839 #define MAX_POOL_SIZE (1020/4)
1840 static pool_node pool_vector[MAX_POOL_SIZE];
1841 static int pool_size;
1843 /* ??? If we need a constant in HImode which is the truncated value of a
1844 constant we need in SImode, we could combine the two entries thus saving
1845 two bytes. Is this common enough to be worth the effort of implementing
1848 /* ??? This stuff should be done at the same time that we shorten branches.
1849 As it is now, we must assume that all branches are the maximum size, and
1850 this causes us to almost always output constant pools sooner than
1853 /* Add a constant to the pool and return its label. */
1856 add_constant (x, mode, last_value)
1859 enum machine_mode mode;
1864 /* First see if we've already got it. */
1865 for (i = 0; i < pool_size; i++)
1867 if (x->code == pool_vector[i].value->code
1868 && mode == pool_vector[i].mode)
1870 if (x->code == CODE_LABEL)
1872 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
1875 if (rtx_equal_p (x, pool_vector[i].value))
1880 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
1882 lab = pool_vector[i].label;
1884 pool_vector[i].label = lab = gen_label_rtx ();
1891 /* Need a new one. */
1892 pool_vector[pool_size].value = x;
1893 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
1896 lab = gen_label_rtx ();
1897 pool_vector[pool_size].mode = mode;
1898 pool_vector[pool_size].label = lab;
1903 /* Output the literal table. */
1912 /* Do two passes, first time dump out the HI sized constants. */
1914 for (i = 0; i < pool_size; i++)
1916 pool_node *p = &pool_vector[i];
1918 if (p->mode == HImode)
1922 scan = emit_insn_after (gen_align_2 (), scan);
1925 scan = emit_label_after (p->label, scan);
1926 scan = emit_insn_after (gen_consttable_2 (p->value), scan);
1932 for (i = 0; i < pool_size; i++)
1934 pool_node *p = &pool_vector[i];
1945 scan = emit_label_after (gen_label_rtx (), scan);
1946 scan = emit_insn_after (gen_align_4 (), scan);
1949 scan = emit_label_after (p->label, scan);
1950 scan = emit_insn_after (gen_consttable_4 (p->value), scan);
1957 scan = emit_label_after (gen_label_rtx (), scan);
1958 scan = emit_insn_after (gen_align_4 (), scan);
1961 scan = emit_label_after (p->label, scan);
1962 scan = emit_insn_after (gen_consttable_8 (p->value), scan);
1970 scan = emit_insn_after (gen_consttable_end (), scan);
1971 scan = emit_barrier_after (scan);
1975 /* Return non-zero if constant would be an ok source for a
1976 mov.w instead of a mov.l. */
1982 return (GET_CODE (src) == CONST_INT
1983 && INTVAL (src) >= -32768
1984 && INTVAL (src) <= 32767);
1987 /* Non-zero if the insn is a move instruction which needs to be fixed. */
1989 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
1990 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
1991 need to fix it if the input value is CONST_OK_FOR_I. */
1997 if (GET_CODE (insn) == INSN)
1999 rtx pat = PATTERN (insn);
2000 if (GET_CODE (pat) == PARALLEL)
2001 pat = XVECEXP (pat, 0, 0);
2002 if (GET_CODE (pat) == SET
2003 /* We can load any 8 bit value if we don't care what the high
2004 order bits end up as. */
2005 && GET_MODE (SET_DEST (pat)) != QImode
2006 && CONSTANT_P (SET_SRC (pat))
2008 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2009 && (fp_zero_operand (SET_SRC (pat))
2010 || fp_one_operand (SET_SRC (pat)))
2011 && GET_CODE (SET_DEST (pat)) == REG
2012 && REGNO (SET_DEST (pat)) >= FIRST_FP_REG
2013 && REGNO (SET_DEST (pat)) <= LAST_FP_REG)
2014 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2015 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
2026 return (GET_CODE (insn) == INSN
2027 && GET_CODE (PATTERN (insn)) == SET
2028 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
2029 && XINT (SET_SRC (PATTERN (insn)), 1) == 1);
2032 /* Find the last barrier from insn FROM which is close enough to hold the
2033 constant pool. If we can't find one, then create one near the end of
2037 find_barrier (num_mova, mova, from)
2047 int leading_mova = num_mova;
2048 rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
2052 /* For HImode: range is 510, add 4 because pc counts from address of
2053 second instruction after this one, subtract 2 for the jump instruction
2054 that we may need to emit before the table, subtract 2 for the instruction
2055 that fills the jump delay slot (in very rare cases, reorg will take an
2056 instruction from after the constant pool or will leave the delay slot
2057 empty). This gives 510.
2058 For SImode: range is 1020, add 4 because pc counts from address of
2059 second instruction after this one, subtract 2 in case pc is 2 byte
2060 aligned, subtract 2 for the jump instruction that we may need to emit
2061 before the table, subtract 2 for the instruction that fills the jump
2062 delay slot. This gives 1018. */
2064 /* The branch will always be shortened now that the reference address for
2065 forward branches is the successor address, thus we need no longer make
2066 adjustments to the [sh]i_limit for -O0. */
2071 while (from && count_si < si_limit && count_hi < hi_limit)
2073 int inc = get_attr_length (from);
2076 if (GET_CODE (from) == CODE_LABEL)
2079 new_align = 1 << label_to_alignment (from);
2080 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
2081 new_align = 1 << barrier_align (from);
2087 if (GET_CODE (from) == BARRIER)
2090 found_barrier = from;
2092 /* If we are at the end of the function, or in front of an alignment
2093 instruction, we need not insert an extra alignment. We prefer
2094 this kind of barrier. */
2095 if (barrier_align (from) > 2)
2096 good_barrier = from;
2099 if (broken_move (from))
2102 enum machine_mode mode;
2104 pat = PATTERN (from);
2105 if (GET_CODE (pat) == PARALLEL)
2106 pat = XVECEXP (pat, 0, 0);
2107 src = SET_SRC (pat);
2108 dst = SET_DEST (pat);
2109 mode = GET_MODE (dst);
2111 /* We must explicitly check the mode, because sometimes the
2112 front end will generate code to load unsigned constants into
2113 HImode targets without properly sign extending them. */
2115 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
2118 /* We put the short constants before the long constants, so
2119 we must count the length of short constants in the range
2120 for the long constants. */
2121 /* ??? This isn't optimal, but is easy to do. */
2126 while (si_align > 2 && found_si + si_align - 2 > count_si)
2128 if (found_si > count_si)
2129 count_si = found_si;
2130 found_si += GET_MODE_SIZE (mode);
2132 si_limit -= GET_MODE_SIZE (mode);
2142 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
2144 if (found_si > count_si)
2145 count_si = found_si;
2147 else if (GET_CODE (from) == JUMP_INSN
2148 && (GET_CODE (PATTERN (from)) == ADDR_VEC
2149 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
2153 if (barrier_align (next_real_insn (from)) == CACHE_LOG)
2155 /* We have just passed the barrier in front of the
2156 ADDR_DIFF_VEC, which is stored in found_barrier. Since
2157 the ADDR_DIFF_VEC is accessed as data, just like our pool
2158 constants, this is a good opportunity to accommodate what
2159 we have gathered so far.
2160 If we waited any longer, we could end up at a barrier in
2161 front of code, which gives worse cache usage for separated
2162 instruction / data caches. */
2163 good_barrier = found_barrier;
2168 rtx body = PATTERN (from);
2169 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
2175 if (new_align > si_align)
2177 si_limit -= count_si - 1 & new_align - si_align;
2178 si_align = new_align;
2180 count_si = count_si + new_align - 1 & -new_align;
2185 if (new_align > hi_align)
2187 hi_limit -= count_hi - 1 & new_align - hi_align;
2188 hi_align = new_align;
2190 count_hi = count_hi + new_align - 1 & -new_align;
2193 from = NEXT_INSN (from);
2199 /* Try as we might, the leading mova is out of range. Change
2200 it into a load (which will become a pcload) and retry. */
2201 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
2202 INSN_CODE (mova) = -1;
2203 return find_barrier (0, 0, mova);
2207 /* Insert the constant pool table before the mova instruction,
2208 to prevent the mova label reference from going out of range. */
2210 good_barrier = found_barrier = barrier_before_mova;
2215 if (good_barrier && next_real_insn (found_barrier))
2216 found_barrier = good_barrier;
2220 /* We didn't find a barrier in time to dump our stuff,
2221 so we'll make one. */
2222 rtx label = gen_label_rtx ();
2224 /* If we exceeded the range, then we must back up over the last
2225 instruction we looked at. Otherwise, we just need to undo the
2226 NEXT_INSN at the end of the loop. */
2227 if (count_hi > hi_limit || count_si > si_limit)
2228 from = PREV_INSN (PREV_INSN (from));
2230 from = PREV_INSN (from);
2232 /* Walk back to be just before any jump or label.
2233 Putting it before a label reduces the number of times the branch
2234 around the constant pool table will be hit. Putting it before
2235 a jump makes it more likely that the bra delay slot will be
2237 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
2238 || GET_CODE (from) == CODE_LABEL)
2239 from = PREV_INSN (from);
2241 from = emit_jump_insn_after (gen_jump (label), from);
2242 JUMP_LABEL (from) = label;
2243 LABEL_NUSES (label) = 1;
2244 found_barrier = emit_barrier_after (from);
2245 emit_label_after (label, found_barrier);
2248 return found_barrier;
2251 /* If the instruction INSN is implemented by a special function, and we can
2252 positively find the register that is used to call the sfunc, and this
2253 register is not used anywhere else in this instruction - except as the
2254 destination of a set, return this register; else, return 0. */
2256 sfunc_uses_reg (insn)
2260 rtx pattern, part, reg_part, reg;
2262 if (GET_CODE (insn) != INSN)
2264 pattern = PATTERN (insn);
2265 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
2268 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2270 part = XVECEXP (pattern, 0, i);
2271 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
2276 reg = XEXP (reg_part, 0);
2277 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
2279 part = XVECEXP (pattern, 0, i);
2280 if (part == reg_part || GET_CODE (part) == CLOBBER)
2282 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
2283 && GET_CODE (SET_DEST (part)) == REG)
2284 ? SET_SRC (part) : part)))
2290 /* See if the only way in which INSN uses REG is by calling it, or by
2291 setting it while calling it. Set *SET to a SET rtx if the register
2295 noncall_uses_reg (reg, insn, set)
2304 reg2 = sfunc_uses_reg (insn);
2305 if (reg2 && REGNO (reg2) == REGNO (reg))
2307 pattern = single_set (insn);
2309 && GET_CODE (SET_DEST (pattern)) == REG
2310 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2314 if (GET_CODE (insn) != CALL_INSN)
2316 /* We don't use rtx_equal_p because we don't care if the mode is
2318 pattern = single_set (insn);
2320 && GET_CODE (SET_DEST (pattern)) == REG
2321 && REGNO (reg) == REGNO (SET_DEST (pattern)))
2327 par = PATTERN (insn);
2328 if (GET_CODE (par) == PARALLEL)
2329 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
2331 part = XVECEXP (par, 0, i);
2332 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
2335 return reg_mentioned_p (reg, SET_SRC (pattern));
2341 pattern = PATTERN (insn);
2343 if (GET_CODE (pattern) == PARALLEL)
2347 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
2348 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
2350 pattern = XVECEXP (pattern, 0, 0);
2353 if (GET_CODE (pattern) == SET)
2355 if (reg_mentioned_p (reg, SET_DEST (pattern)))
2357 /* We don't use rtx_equal_p, because we don't care if the
2358 mode is different. */
2359 if (GET_CODE (SET_DEST (pattern)) != REG
2360 || REGNO (reg) != REGNO (SET_DEST (pattern)))
2366 pattern = SET_SRC (pattern);
2369 if (GET_CODE (pattern) != CALL
2370 || GET_CODE (XEXP (pattern, 0)) != MEM
2371 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
2377 /* Given a X, a pattern of an insn or a part of it, return a mask of used
2378 general registers. Bits 0..15 mean that the respective registers
2379 are used as inputs in the instruction. Bits 16..31 mean that the
2380 registers 0..15, respectively, are used as outputs, or are clobbered.
2381 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
2383 regs_used (x, is_dest)
2392 code = GET_CODE (x);
2397 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2398 << (REGNO (x) + is_dest));
2402 rtx y = SUBREG_REG (x);
2404 if (GET_CODE (y) != REG)
2407 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
2408 << (REGNO (y) + SUBREG_WORD (x) + is_dest));
2412 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
2414 /* If there was a return value, it must have been indicated with USE. */
2427 fmt = GET_RTX_FORMAT (code);
2429 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
2434 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
2435 used |= regs_used (XVECEXP (x, i, j), is_dest);
2437 else if (fmt[i] == 'e')
2438 used |= regs_used (XEXP (x, i), is_dest);
2443 /* Create an instruction that prevents redirection of a conditional branch
2444 to the destination of the JUMP with address ADDR.
2445 If the branch needs to be implemented as an indirect jump, try to find
2446 a scratch register for it.
2447 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
2448 If any preceding insn that doesn't fit into a delay slot is good enough,
2449 pass 1. Pass 2 if a definite blocking insn is needed.
2450 -1 is used internally to avoid deep recursion.
2451 If a blocking instruction is made or recognized, return it. */
2454 gen_block_redirect (jump, addr, need_block)
2456 int addr, need_block;
2459 rtx prev = prev_nonnote_insn (jump);
2462 /* First, check if we already have an instruction that satisfies our need. */
2463 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
2465 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2467 if (GET_CODE (PATTERN (prev)) == USE
2468 || GET_CODE (PATTERN (prev)) == CLOBBER
2469 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2471 else if ((need_block &= ~1) < 0)
2473 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
2476 /* We can't use JUMP_LABEL here because it might be undefined
2477 when not optimizing. */
2478 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
2479 /* If the branch is out of range, try to find a scratch register for it. */
2481 && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098))
2484 /* Don't look for the stack pointer as a scratch register,
2485 it would cause trouble if an interrupt occurred. */
2486 unsigned try = 0x7fff, used;
2487 int jump_left = flag_expensive_optimizations + 1;
2489 /* It is likely that the most recent eligible instruction is wanted for
2490 the delay slot. Therefore, find out which registers it uses, and
2491 try to avoid using them. */
2493 for (scan = jump; scan = PREV_INSN (scan); )
2497 if (INSN_DELETED_P (scan))
2499 code = GET_CODE (scan);
2500 if (code == CODE_LABEL || code == JUMP_INSN)
2503 && GET_CODE (PATTERN (scan)) != USE
2504 && GET_CODE (PATTERN (scan)) != CLOBBER
2505 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
2507 try &= ~regs_used (PATTERN (scan), 0);
2511 for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); )
2515 if (INSN_DELETED_P (scan))
2517 code = GET_CODE (scan);
2518 if (GET_RTX_CLASS (code) == 'i')
2520 used |= regs_used (PATTERN (scan), 0);
2521 if (code == CALL_INSN)
2522 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
2523 dead |= (used >> 16) & ~used;
2529 if (code == JUMP_INSN)
2530 if (jump_left-- && simplejump_p (scan))
2531 scan = JUMP_LABEL (scan);
2536 /* Mask out the stack pointer again, in case it was
2537 the only 'free' register we have found. */
2540 /* If the immediate destination is still in range, check for possible
2541 threading with a jump beyond the delay slot insn.
2542 Don't check if we are called recursively; the jump has been or will be
2543 checked in a different invocation then. */
2545 else if (optimize && need_block >= 0)
2547 rtx next = next_active_insn (next_active_insn (dest));
2548 if (next && GET_CODE (next) == JUMP_INSN
2549 && GET_CODE (PATTERN (next)) == SET
2550 && recog_memoized (next) == CODE_FOR_jump)
2552 dest = JUMP_LABEL (next);
2554 && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)
2555 gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1);
2561 rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead));
2563 /* It would be nice if we could convert the jump into an indirect
2564 jump / far branch right now, and thus exposing all constituent
2565 instructions to further optimization. However, reorg uses
2566 simplejump_p to determine if there is an unconditional jump where
2567 it should try to schedule instructions from the target of the
2568 branch; simplejump_p fails for indirect jumps even if they have
2570 rtx insn = emit_insn_before (gen_indirect_jump_scratch
2571 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
2573 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
2576 else if (need_block)
2577 /* We can't use JUMP_LABEL here because it might be undefined
2578 when not optimizing. */
2579 return emit_insn_before (gen_block_branch_redirect
2580 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
2585 #define CONDJUMP_MIN -252
2586 #define CONDJUMP_MAX 262
2589 /* A label (to be placed) in front of the jump
2590 that jumps to our ultimate destination. */
2592 /* Where we are going to insert it if we cannot move the jump any farther,
2593 or the jump itself if we have picked up an existing jump. */
2595 /* The ultimate destination. */
2597 struct far_branch *prev;
2598 /* If the branch has already been created, its address;
2599 else the address of its first prospective user. */
2603 enum mdep_reorg_phase_e mdep_reorg_phase;
2606 struct far_branch *bp;
2608 rtx insn = bp->insert_place;
2610 rtx label = gen_label_rtx ();
2612 emit_label_after (label, insn);
2615 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
2616 LABEL_NUSES (bp->far_label)++;
2619 jump = emit_jump_insn_after (gen_return (), insn);
2620 /* Emit a barrier so that reorg knows that any following instructions
2621 are not reachable via a fall-through path.
2622 But don't do this when not optimizing, since we wouldn't supress the
2623 alignment for the barrier then, and could end up with out-of-range
2624 pc-relative loads. */
2626 emit_barrier_after (jump);
2627 emit_label_after (bp->near_label, insn);
2628 JUMP_LABEL (jump) = bp->far_label;
2629 if (! invert_jump (insn, label))
2631 /* Prevent reorg from undoing our splits. */
2632 gen_block_redirect (jump, bp->address += 2, 2);
2635 /* Fix up ADDR_DIFF_VECs. */
2637 fixup_addr_diff_vecs (first)
2642 for (insn = first; insn; insn = NEXT_INSN (insn))
2644 rtx vec_lab, pat, prev, prevpat, x;
2646 if (GET_CODE (insn) != JUMP_INSN
2647 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
2649 pat = PATTERN (insn);
2650 vec_lab = XEXP (XEXP (pat, 0), 0);
2652 /* Search the matching casesi_jump_2. */
2653 for (prev = vec_lab; ; prev = PREV_INSN (prev))
2655 if (GET_CODE (prev) != JUMP_INSN)
2657 prevpat = PATTERN (prev);
2658 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
2660 x = XVECEXP (prevpat, 0, 1);
2661 if (GET_CODE (x) != USE)
2664 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
2667 /* Fix up the ADDR_DIF_VEC to be relative
2668 to the reference address of the braf. */
2669 XEXP (XEXP (pat, 0), 0)
2670 = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
2674 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
2675 a barrier. Return the base 2 logarithm of the desired alignment. */
2677 barrier_align (barrier_or_label)
2678 rtx barrier_or_label;
2680 rtx next = next_real_insn (barrier_or_label), pat, prev;
2686 pat = PATTERN (next);
2688 if (GET_CODE (pat) == ADDR_DIFF_VEC)
2691 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1)
2692 /* This is a barrier in front of a constant table. */
2695 prev = prev_real_insn (barrier_or_label);
2696 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
2698 pat = PATTERN (prev);
2699 /* If this is a very small table, we want to keep the alignment after
2700 the table to the minimum for proper code alignment. */
2701 return ((TARGET_SMALLCODE
2702 || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
2703 <= 1 << (CACHE_LOG - 2)))
2707 if (TARGET_SMALLCODE)
2710 if (! TARGET_SH3 || ! optimize)
2713 /* When fixing up pcloads, a constant table might be inserted just before
2714 the basic block that ends with the barrier. Thus, we can't trust the
2715 instruction lengths before that. */
2716 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
2718 /* Check if there is an immediately preceding branch to the insn beyond
2719 the barrier. We must weight the cost of discarding useful information
2720 from the current cache line when executing this branch and there is
2721 an alignment, against that of fetching unneeded insn in front of the
2722 branch target when there is no alignment. */
2724 /* PREV is presumed to be the JUMP_INSN for the barrier under
2725 investigation. Skip to the insn before it. */
2726 prev = prev_real_insn (prev);
2728 for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
2729 credit >= 0 && prev && GET_CODE (prev) == INSN;
2730 prev = prev_real_insn (prev))
2732 if (GET_CODE (PATTERN (prev)) == USE
2733 || GET_CODE (PATTERN (prev)) == CLOBBER)
2735 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
2736 prev = XVECEXP (PATTERN (prev), 0, 1);
2738 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
2740 credit -= get_attr_length (prev);
2743 && GET_CODE (prev) == JUMP_INSN
2744 && JUMP_LABEL (prev)
2745 && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
2746 && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
2753 /* Exported to toplev.c.
2755 Do a final pass over the function, just before delayed branch
2759 machine_dependent_reorg (first)
2764 rtx r0_rtx = gen_rtx (REG, Pmode, 0);
2765 rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx);
2767 /* If relaxing, generate pseudo-ops to associate function calls with
2768 the symbols they call. It does no harm to not generate these
2769 pseudo-ops. However, when we can generate them, it enables to
2770 linker to potentially relax the jsr to a bsr, and eliminate the
2771 register load and, possibly, the constant pool entry. */
2773 mdep_reorg_phase = SH_INSERT_USES_LABELS;
2776 /* Remove all REG_LABEL notes. We want to use them for our own
2777 purposes. This works because none of the remaining passes
2778 need to look at them.
2780 ??? But it may break in the future. We should use a machine
2781 dependent REG_NOTE, or some other approach entirely. */
2782 for (insn = first; insn; insn = NEXT_INSN (insn))
2784 if (GET_RTX_CLASS (GET_CODE (insn)) == 'i')
2788 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
2789 remove_note (insn, note);
2793 for (insn = first; insn; insn = NEXT_INSN (insn))
2795 rtx pattern, reg, link, set, scan, dies, label;
2796 int rescan = 0, foundinsn = 0;
2798 if (GET_CODE (insn) == CALL_INSN)
2800 pattern = PATTERN (insn);
2802 if (GET_CODE (pattern) == PARALLEL)
2803 pattern = XVECEXP (pattern, 0, 0);
2804 if (GET_CODE (pattern) == SET)
2805 pattern = SET_SRC (pattern);
2807 if (GET_CODE (pattern) != CALL
2808 || GET_CODE (XEXP (pattern, 0)) != MEM)
2811 reg = XEXP (XEXP (pattern, 0), 0);
2815 reg = sfunc_uses_reg (insn);
2820 if (GET_CODE (reg) != REG)
2823 /* This is a function call via REG. If the only uses of REG
2824 between the time that it is set and the time that it dies
2825 are in function calls, then we can associate all the
2826 function calls with the setting of REG. */
2828 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
2830 if (REG_NOTE_KIND (link) != 0)
2832 set = single_set (XEXP (link, 0));
2833 if (set && rtx_equal_p (reg, SET_DEST (set)))
2835 link = XEXP (link, 0);
2842 /* ??? Sometimes global register allocation will have
2843 deleted the insn pointed to by LOG_LINKS. Try
2844 scanning backward to find where the register is set. */
2845 for (scan = PREV_INSN (insn);
2846 scan && GET_CODE (scan) != CODE_LABEL;
2847 scan = PREV_INSN (scan))
2849 if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
2852 if (! reg_mentioned_p (reg, scan))
2855 if (noncall_uses_reg (reg, scan, &set))
2869 /* The register is set at LINK. */
2871 /* We can only optimize the function call if the register is
2872 being set to a symbol. In theory, we could sometimes
2873 optimize calls to a constant location, but the assembler
2874 and linker do not support that at present. */
2875 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
2876 && GET_CODE (SET_SRC (set)) != LABEL_REF)
2879 /* Scan forward from LINK to the place where REG dies, and
2880 make sure that the only insns which use REG are
2881 themselves function calls. */
2883 /* ??? This doesn't work for call targets that were allocated
2884 by reload, since there may not be a REG_DEAD note for the
2888 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
2892 /* Don't try to trace forward past a CODE_LABEL if we haven't
2893 seen INSN yet. Ordinarily, we will only find the setting insn
2894 in LOG_LINKS if it is in the same basic block. However,
2895 cross-jumping can insert code labels in between the load and
2896 the call, and can result in situations where a single call
2897 insn may have two targets depending on where we came from. */
2899 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
2902 if (GET_RTX_CLASS (GET_CODE (scan)) != 'i')
2905 /* Don't try to trace forward past a JUMP. To optimize
2906 safely, we would have to check that all the
2907 instructions at the jump destination did not use REG. */
2909 if (GET_CODE (scan) == JUMP_INSN)
2912 if (! reg_mentioned_p (reg, scan))
2915 if (noncall_uses_reg (reg, scan, &scanset))
2922 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
2924 /* There is a function call to this register other
2925 than the one we are checking. If we optimize
2926 this call, we need to rescan again below. */
2930 /* ??? We shouldn't have to worry about SCANSET here.
2931 We should just be able to check for a REG_DEAD note
2932 on a function call. However, the REG_DEAD notes are
2933 apparently not dependable around libcalls; c-torture
2934 execute/920501-2 is a test case. If SCANSET is set,
2935 then this insn sets the register, so it must have
2936 died earlier. Unfortunately, this will only handle
2937 the cases in which the register is, in fact, set in a
2940 /* ??? We shouldn't have to use FOUNDINSN here.
2941 However, the LOG_LINKS fields are apparently not
2942 entirely reliable around libcalls;
2943 newlib/libm/math/e_pow.c is a test case. Sometimes
2944 an insn will appear in LOG_LINKS even though it is
2945 not the most recent insn which sets the register. */
2949 || find_reg_note (scan, REG_DEAD, reg)))
2958 /* Either there was a branch, or some insn used REG
2959 other than as a function call address. */
2963 /* Create a code label, and put it in a REG_LABEL note on
2964 the insn which sets the register, and on each call insn
2965 which uses the register. In final_prescan_insn we look
2966 for the REG_LABEL notes, and output the appropriate label
2969 label = gen_label_rtx ();
2970 REG_NOTES (link) = gen_rtx (EXPR_LIST, REG_LABEL, label,
2972 REG_NOTES (insn) = gen_rtx (EXPR_LIST, REG_LABEL, label,
2981 scan = NEXT_INSN (scan);
2983 && ((GET_CODE (scan) == CALL_INSN
2984 && reg_mentioned_p (reg, scan))
2985 || ((reg2 = sfunc_uses_reg (scan))
2986 && REGNO (reg2) == REGNO (reg))))
2987 REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL,
2988 label, REG_NOTES (scan));
2990 while (scan != dies);
2996 fixup_addr_diff_vecs (first);
3000 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
3001 shorten_branches (first);
3003 /* Scan the function looking for move instructions which have to be
3004 changed to pc-relative loads and insert the literal tables. */
3006 mdep_reorg_phase = SH_FIXUP_PCLOAD;
3007 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
3014 else if (GET_CODE (insn) == JUMP_INSN
3015 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
3023 /* Some code might have been inserted between the mova and
3024 its ADDR_DIFF_VEC. Check if the mova is still in range. */
3025 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
3026 total += get_attr_length (scan);
3028 /* range of mova is 1020, add 4 because pc counts from address of
3029 second instruction after this one, subtract 2 in case pc is 2
3030 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
3031 cancels out with alignment effects of the mova itself. */
3034 /* Change the mova into a load, and restart scanning
3035 there. broken_move will then return true for mova. */
3036 SET_SRC (PATTERN (mova))
3037 = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3038 INSN_CODE (mova) = -1;
3042 if (broken_move (insn))
3045 /* Scan ahead looking for a barrier to stick the constant table
3047 rtx barrier = find_barrier (num_mova, mova, insn);
3048 rtx last_float_move, last_float = 0, *last_float_addr;
3050 if (num_mova && ! mova_p (mova))
3052 /* find_barrier had to change the first mova into a
3053 pcload; thus, we have to start with this new pcload. */
3057 /* Now find all the moves between the points and modify them. */
3058 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
3060 if (GET_CODE (scan) == CODE_LABEL)
3062 if (broken_move (scan))
3064 rtx *patp = &PATTERN (scan), pat = *patp;
3069 enum machine_mode mode;
3071 if (GET_CODE (pat) == PARALLEL)
3072 patp = &XVECEXP (pat, 0, 0), pat = *patp;
3073 src = SET_SRC (pat);
3074 dst = SET_DEST (pat);
3075 mode = GET_MODE (dst);
3077 if (mode == SImode && hi_const (src)
3078 && REGNO (dst) != FPUL_REG)
3083 while (GET_CODE (dst) == SUBREG)
3085 offset += SUBREG_WORD (dst);
3086 dst = SUBREG_REG (dst);
3088 dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
3091 if (GET_CODE (dst) == REG
3092 && ((REGNO (dst) >= FIRST_FP_REG
3093 && REGNO (dst) <= LAST_XD_REG)
3094 || REGNO (dst) == FPUL_REG))
3097 && reg_set_between_p (r0_rtx, last_float_move, scan))
3099 lab = add_constant (src, mode, last_float);
3101 emit_insn_before (gen_mova (lab), scan);
3103 *last_float_addr = r0_inc_rtx;
3104 last_float_move = scan;
3106 newsrc = gen_rtx (MEM, mode,
3107 ((TARGET_SH4 && ! TARGET_FMOVD
3108 || REGNO (dst) == FPUL_REG)
3111 last_float_addr = &XEXP (newsrc, 0);
3115 lab = add_constant (src, mode, 0);
3116 newsrc = gen_rtx (MEM, mode,
3117 gen_rtx (LABEL_REF, VOIDmode, lab));
3119 RTX_UNCHANGING_P (newsrc) = 1;
3120 *patp = gen_rtx (SET, VOIDmode, dst, newsrc);
3121 INSN_CODE (scan) = -1;
3124 dump_table (barrier);
3129 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
3131 split_branches (first);
3133 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
3134 also has an effect on the register that holds the addres of the sfunc.
3135 Insert an extra dummy insn in front of each sfunc that pretends to
3136 use this register. */
3137 if (flag_delayed_branch)
3139 for (insn = first; insn; insn = NEXT_INSN (insn))
3141 rtx reg = sfunc_uses_reg (insn);
3145 emit_insn_before (gen_use_sfunc_addr (reg), insn);
3149 /* fpscr is not actually a user variable, but we pretend it is for the
3150 sake of the previous optimization passes, since we want it handled like
3151 one. However, we don't have eny debugging information for it, so turn
3152 it into a non-user variable now. */
3154 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
3157 sh_flag_remove_dead_before_cse = 1;
3158 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
3162 get_dest_uid (label, max_uid)
3166 rtx dest = next_real_insn (label);
3169 /* This can happen for an undefined label. */
3171 dest_uid = INSN_UID (dest);
3172 /* If this is a newly created branch redirection blocking instruction,
3173 we cannot index the branch_uid or insn_addresses arrays with its
3174 uid. But then, we won't need to, because the actual destination is
3175 the following branch. */
3176 while (dest_uid >= max_uid)
3178 dest = NEXT_INSN (dest);
3179 dest_uid = INSN_UID (dest);
3181 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
3186 /* Split condbranches that are out of range. Also add clobbers for
3187 scratch registers that are needed in far jumps.
3188 We do this before delay slot scheduling, so that it can take our
3189 newly created instructions into account. It also allows us to
3190 find branches with common targets more easily. */
3193 split_branches (first)
3197 struct far_branch **uid_branch, *far_branch_list = 0;
3198 int max_uid = get_max_uid ();
3200 /* Find out which branches are out of range. */
3201 shorten_branches (first);
3203 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
3204 bzero ((char *) uid_branch, max_uid * sizeof *uid_branch);
3206 for (insn = first; insn; insn = NEXT_INSN (insn))
3207 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
3209 else if (INSN_DELETED_P (insn))
3211 /* Shorten_branches would split this instruction again,
3212 so transform it into a note. */
3213 PUT_CODE (insn, NOTE);
3214 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
3215 NOTE_SOURCE_FILE (insn) = 0;
3217 else if (GET_CODE (insn) == JUMP_INSN
3218 /* Don't mess with ADDR_DIFF_VEC */
3219 && (GET_CODE (PATTERN (insn)) == SET
3220 || GET_CODE (PATTERN (insn)) == RETURN))
3222 enum attr_type type = get_attr_type (insn);
3223 if (type == TYPE_CBRANCH)
3227 if (get_attr_length (insn) > 4)
3229 rtx src = SET_SRC (PATTERN (insn));
3230 rtx cond = XEXP (src, 0);
3231 rtx olabel = XEXP (XEXP (src, 1), 0);
3233 int addr = insn_addresses[INSN_UID (insn)];
3235 int dest_uid = get_dest_uid (olabel, max_uid);
3236 struct far_branch *bp = uid_branch[dest_uid];
3238 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
3239 the label if the LABEL_NUSES count drops to zero. There is
3240 always a jump_optimize pass that sets these values, but it
3241 proceeds to delete unreferenced code, and then if not
3242 optimizing, to un-delete the deleted instructions, thus
3243 leaving labels with too low uses counts. */
3246 JUMP_LABEL (insn) = olabel;
3247 LABEL_NUSES (olabel)++;
3251 bp = (struct far_branch *) alloca (sizeof *bp);
3252 uid_branch[dest_uid] = bp;
3253 bp->prev = far_branch_list;
3254 far_branch_list = bp;
3256 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
3257 LABEL_NUSES (bp->far_label)++;
3261 label = bp->near_label;
3262 if (! label && bp->address - addr >= CONDJUMP_MIN)
3264 rtx block = bp->insert_place;
3266 if (GET_CODE (PATTERN (block)) == RETURN)
3267 block = PREV_INSN (block);
3269 block = gen_block_redirect (block,
3271 label = emit_label_after (gen_label_rtx (),
3273 bp->near_label = label;
3275 else if (label && ! NEXT_INSN (label))
3276 if (addr + 2 - bp->address <= CONDJUMP_MAX)
3277 bp->insert_place = insn;
3279 gen_far_branch (bp);
3282 || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)
3284 bp->near_label = label = gen_label_rtx ();
3285 bp->insert_place = insn;
3288 if (! redirect_jump (insn, label))
3293 /* get_attr_length (insn) == 2 */
3294 /* Check if we have a pattern where reorg wants to redirect
3295 the branch to a label from an unconditional branch that
3297 /* We can't use JUMP_LABEL here because it might be undefined
3298 when not optimizing. */
3299 /* A syntax error might cause beyond to be NULL_RTX. */
3301 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
3305 && (GET_CODE (beyond) == JUMP_INSN
3306 || (GET_CODE (beyond = next_active_insn (beyond))
3308 && GET_CODE (PATTERN (beyond)) == SET
3309 && recog_memoized (beyond) == CODE_FOR_jump
3310 && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))]
3311 - insn_addresses[INSN_UID (insn)] + 252U)
3313 gen_block_redirect (beyond,
3314 insn_addresses[INSN_UID (beyond)], 1);
3317 next = next_active_insn (insn);
3319 if ((GET_CODE (next) == JUMP_INSN
3320 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
3321 && GET_CODE (PATTERN (next)) == SET
3322 && recog_memoized (next) == CODE_FOR_jump
3323 && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))]
3324 - insn_addresses[INSN_UID (insn)] + 252U)
3326 gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1);
3328 else if (type == TYPE_JUMP || type == TYPE_RETURN)
3330 int addr = insn_addresses[INSN_UID (insn)];
3333 struct far_branch *bp;
3335 if (type == TYPE_JUMP)
3337 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
3338 dest_uid = get_dest_uid (far_label, max_uid);
3341 /* Parse errors can lead to labels outside
3343 if (! NEXT_INSN (far_label))
3348 JUMP_LABEL (insn) = far_label;
3349 LABEL_NUSES (far_label)++;
3351 redirect_jump (insn, NULL_RTX);
3355 bp = uid_branch[dest_uid];
3358 bp = (struct far_branch *) alloca (sizeof *bp);
3359 uid_branch[dest_uid] = bp;
3360 bp->prev = far_branch_list;
3361 far_branch_list = bp;
3363 bp->far_label = far_label;
3365 LABEL_NUSES (far_label)++;
3367 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
3368 if (addr - bp->address <= CONDJUMP_MAX)
3369 emit_label_after (bp->near_label, PREV_INSN (insn));
3372 gen_far_branch (bp);
3378 bp->insert_place = insn;
3380 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
3382 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
3385 /* Generate all pending far branches,
3386 and free our references to the far labels. */
3387 while (far_branch_list)
3389 if (far_branch_list->near_label
3390 && ! NEXT_INSN (far_branch_list->near_label))
3391 gen_far_branch (far_branch_list);
3393 && far_branch_list->far_label
3394 && ! --LABEL_NUSES (far_branch_list->far_label))
3395 delete_insn (far_branch_list->far_label);
3396 far_branch_list = far_branch_list->prev;
3399 /* Instruction length information is no longer valid due to the new
3400 instructions that have been generated. */
3401 init_insn_lengths ();
3404 /* Dump out instruction addresses, which is useful for debugging the
3405 constant pool table stuff.
3407 If relaxing, output the label and pseudo-ops used to link together
3408 calls and the instruction which set the registers. */
3410 /* ??? This is unnecessary, and probably should be deleted. This makes
3411 the insn_addresses declaration above unnecessary. */
3413 /* ??? The addresses printed by this routine for insns are nonsense for
3414 insns which are inside of a sequence where none of the inner insns have
3415 variable length. This is because the second pass of shorten_branches
3416 does not bother to update them. */
3419 final_prescan_insn (insn, opvec, noperands)
3424 if (TARGET_DUMPISIZE)
3425 fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]);
3431 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
3436 pattern = PATTERN (insn);
3437 if (GET_CODE (pattern) == PARALLEL)
3438 pattern = XVECEXP (pattern, 0, 0);
3439 if (GET_CODE (pattern) == CALL
3440 || (GET_CODE (pattern) == SET
3441 && (GET_CODE (SET_SRC (pattern)) == CALL
3442 || get_attr_type (insn) == TYPE_SFUNC)))
3443 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
3444 CODE_LABEL_NUMBER (XEXP (note, 0)));
3445 else if (GET_CODE (pattern) == SET)
3446 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3447 CODE_LABEL_NUMBER (XEXP (note, 0)));
3454 /* Dump out any constants accumulated in the final pass. These will
3458 output_jump_label_table ()
3464 fprintf (asm_out_file, "\t.align 2\n");
3465 for (i = 0; i < pool_size; i++)
3467 pool_node *p = &pool_vector[i];
3469 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
3470 CODE_LABEL_NUMBER (p->label));
3471 output_asm_insn (".long %O0", &p->value);
3479 /* A full frame looks like:
3483 [ if current_function_anonymous_args
3496 local-0 <- fp points here. */
3498 /* Number of bytes pushed for anonymous args, used to pass information
3499 between expand_prologue and expand_epilogue. */
3501 static int extra_push;
3503 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register
3504 to be adjusted, and TEMP, if nonnegative, holds the register number
3505 of a general register that we may clobber. */
3508 output_stack_adjust (size, reg, temp)
3515 if (CONST_OK_FOR_I (size))
3516 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
3517 /* Try to do it with two partial adjustments; however, we must make
3518 sure that the stack is properly aligned at all times, in case
3519 an interrupt occurs between the two partial adjustments. */
3520 else if (CONST_OK_FOR_I (size / 2 & -4)
3521 && CONST_OK_FOR_I (size - (size / 2 & -4)))
3523 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
3524 emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
3530 /* If TEMP is invalid, we could temporarily save a general
3531 register to MACL. However, there is currently no need
3532 to handle this case, so just abort when we see it. */
3535 const_reg = gen_rtx (REG, SImode, temp);
3537 /* If SIZE is negative, subtract the positive value.
3538 This sometimes allows a constant pool entry to be shared
3539 between prologue and epilogue code. */
3542 emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
3543 emit_insn (gen_subsi3 (reg, reg, const_reg));
3547 emit_insn (gen_movsi (const_reg, GEN_INT (size)));
3548 emit_insn (gen_addsi3 (reg, reg, const_reg));
3554 /* Output RTL to push register RN onto the stack. */
3562 x = gen_push_fpul ();
3563 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3564 && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
3566 if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
3568 x = gen_push_4 (gen_rtx (REG, DFmode, rn));
3570 else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
3571 x = gen_push_e (gen_rtx (REG, SFmode, rn));
3573 x = gen_push (gen_rtx (REG, SImode, rn));
3576 REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
3577 gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
3580 /* Output RTL to pop register RN from the stack. */
3588 x = gen_pop_fpul ();
3589 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
3590 && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
3592 if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
3594 x = gen_pop_4 (gen_rtx (REG, DFmode, rn));
3596 else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
3597 x = gen_pop_e (gen_rtx (REG, SFmode, rn));
3599 x = gen_pop (gen_rtx (REG, SImode, rn));
3602 REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
3603 gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
3606 /* Generate code to push the regs specified in the mask. */
3609 push_regs (mask, mask2)
3614 /* Push PR last; this gives better latencies after the prologue, and
3615 candidates for the return delay slot when there are no general
3616 registers pushed. */
3617 for (i = 0; i < 32; i++)
3618 if (mask & (1 << i) && i != PR_REG)
3620 for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
3621 if (mask2 & (1 << (i - 32)))
3623 if (mask & (1 << PR_REG))
3627 /* Work out the registers which need to be saved, both as a mask and a
3628 count of saved words.
3630 If doing a pragma interrupt function, then push all regs used by the
3631 function, and if we call another function (we can tell by looking at PR),
3632 make sure that all the regs it clobbers are safe too. */
3635 calc_live_regs (count_ptr, live_regs_mask2)
3637 int *live_regs_mask2;
3640 int live_regs_mask = 0;
3642 int interrupt_handler;
3644 if ((lookup_attribute
3645 ("interrupt_handler",
3646 DECL_MACHINE_ATTRIBUTES (current_function_decl)))
3648 interrupt_handler = 1;
3650 interrupt_handler = 0;
3652 *live_regs_mask2 = 0;
3653 /* If we can save a lot of saves by switching to double mode, do that. */
3654 if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
3655 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
3656 if (regs_ever_live[reg] && regs_ever_live[reg+1]
3657 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
3660 target_flags &= ~FPU_SINGLE_BIT;
3663 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
3665 if ((interrupt_handler && ! pragma_trapa)
3666 ? (/* Need to save all the regs ever live. */
3667 (regs_ever_live[reg]
3668 || (call_used_regs[reg]
3669 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
3670 && regs_ever_live[PR_REG]))
3671 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
3672 && reg != RETURN_ADDRESS_POINTER_REGNUM
3673 && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
3674 : (/* Only push those regs which are used and need to be saved. */
3675 regs_ever_live[reg] && ! call_used_regs[reg]))
3678 *live_regs_mask2 |= 1 << (reg - 32);
3680 live_regs_mask |= 1 << reg;
3682 if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG)
3683 if (reg <= LAST_FP_REG)
3685 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
3688 *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
3690 live_regs_mask |= 1 << (reg ^ 1);
3694 else if (reg <= LAST_XD_REG)
3696 /* Must switch to double mode to access these registers. */
3697 target_flags &= ~FPU_SINGLE_BIT;
3704 return live_regs_mask;
3707 /* Code to generate prologue and epilogue sequences */
3710 sh_expand_prologue ()
3714 int live_regs_mask2;
3715 int save_flags = target_flags;
3716 int double_align = 0;
3718 /* We have pretend args if we had an object sent partially in registers
3719 and partially on the stack, e.g. a large structure. */
3720 output_stack_adjust (-current_function_pretend_args_size,
3721 stack_pointer_rtx, 3);
3725 /* This is set by SETUP_VARARGS to indicate that this is a varargs
3726 routine. Clear it here so that the next function isn't affected. */
3727 if (current_function_anonymous_args)
3729 current_function_anonymous_args = 0;
3731 /* This is not used by the SH3E calling convention */
3732 if (! TARGET_SH3E && ! TARGET_HITACHI)
3734 /* Push arg regs as if they'd been provided by caller in stack. */
3735 for (i = 0; i < NPARM_REGS(SImode); i++)
3737 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
3738 if (i >= (NPARM_REGS(SImode)
3739 - current_function_args_info.arg_count[(int) SH_ARG_INT]
3748 /* If we're supposed to switch stacks at function entry, do so now. */
3750 emit_insn (gen_sp_switch_1 ());
3752 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3753 /* ??? Maybe we could save some switching if we can move a mode switch
3754 that already happens to be at the function start into the prologue. */
3755 if (target_flags != save_flags)
3756 emit_insn (gen_toggle_sz ());
3757 push_regs (live_regs_mask, live_regs_mask2);
3758 if (target_flags != save_flags)
3759 emit_insn (gen_toggle_sz ());
3761 if (TARGET_ALIGN_DOUBLE && d & 1)
3764 target_flags = save_flags;
3766 output_stack_adjust (-get_frame_size () - double_align,
3767 stack_pointer_rtx, 3);
3769 if (frame_pointer_needed)
3770 emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
3774 sh_expand_epilogue ()
3779 int live_regs_mask2;
3780 int save_flags = target_flags;
3781 int frame_size = get_frame_size ();
3783 live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
3785 if (TARGET_ALIGN_DOUBLE && d & 1)
3788 if (frame_pointer_needed)
3790 output_stack_adjust (frame_size, frame_pointer_rtx, 7);
3792 /* We must avoid moving the stack pointer adjustment past code
3793 which reads from the local frame, else an interrupt could
3794 occur after the SP adjustment and clobber data in the local
3796 emit_insn (gen_blockage ());
3797 emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
3799 else if (frame_size)
3801 /* We must avoid moving the stack pointer adjustment past code
3802 which reads from the local frame, else an interrupt could
3803 occur after the SP adjustment and clobber data in the local
3805 emit_insn (gen_blockage ());
3806 output_stack_adjust (frame_size, stack_pointer_rtx, 7);
3809 /* Pop all the registers. */
3811 if (target_flags != save_flags)
3812 emit_insn (gen_toggle_sz ());
3813 if (live_regs_mask & (1 << PR_REG))
3815 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3817 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
3818 if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
3820 else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
3823 if (target_flags != save_flags)
3824 emit_insn (gen_toggle_sz ());
3825 target_flags = save_flags;
3827 output_stack_adjust (extra_push + current_function_pretend_args_size,
3828 stack_pointer_rtx, 7);
3830 /* Switch back to the normal stack if necessary. */
3832 emit_insn (gen_sp_switch_2 ());
3835 /* Clear variables at function end. */
3838 function_epilogue (stream, size)
3842 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
3843 sp_switch = NULL_RTX;
3847 sh_builtin_saveregs (arglist)
3850 tree fntype = TREE_TYPE (current_function_decl);
3851 /* First unnamed integer register. */
3852 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
3853 /* Number of integer registers we need to save. */
3854 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
3855 /* First unnamed SFmode float reg */
3856 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
3857 /* Number of SFmode float regs to save. */
3858 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
3859 int ptrsize = GET_MODE_SIZE (Pmode);
3860 rtx valist, regbuf, fpregs;
3863 /* Allocate block of memory for the regs. */
3864 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
3865 Or can assign_stack_local accept a 0 SIZE argument? */
3866 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
3868 regbuf = assign_stack_local (BLKmode, bufsize, 0);
3869 MEM_SET_IN_STRUCT_P (regbuf, 1);
3872 This is optimized to only save the regs that are necessary. Explicitly
3873 named args need not be saved. */
3875 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
3876 gen_rtx (MEM, BLKmode,
3877 plus_constant (XEXP (regbuf, 0),
3878 n_floatregs * UNITS_PER_WORD)),
3879 n_intregs, n_intregs * UNITS_PER_WORD);
3882 This is optimized to only save the regs that are necessary. Explicitly
3883 named args need not be saved.
3884 We explicitly build a pointer to the buffer because it halves the insn
3885 count when not optimizing (otherwise the pointer is built for each reg
3887 We emit the moves in reverse order so that we can use predecrement. */
3889 fpregs = gen_reg_rtx (Pmode);
3890 emit_move_insn (fpregs, XEXP (regbuf, 0));
3891 emit_insn (gen_addsi3 (fpregs, fpregs,
3892 GEN_INT (n_floatregs * UNITS_PER_WORD)));
3895 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
3897 emit_insn (gen_addsi3 (fpregs, fpregs,
3898 GEN_INT (-2 * UNITS_PER_WORD)));
3899 emit_move_insn (gen_rtx (MEM, DFmode, fpregs),
3900 gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
3902 regno = first_floatreg;
3905 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
3906 emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
3907 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
3908 - (TARGET_LITTLE_ENDIAN != 0)));
3912 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
3914 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
3915 emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
3916 gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno));
3919 /* Return the address of the regbuf. */
3920 return XEXP (regbuf, 0);
3923 /* Define the offset between two registers, one to be eliminated, and
3924 the other its replacement, at the start of a routine. */
3927 initial_elimination_offset (from, to)
3932 int total_saved_regs_space;
3933 int total_auto_space = get_frame_size ();
3934 int save_flags = target_flags;
3936 int live_regs_mask, live_regs_mask2;
3937 live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2);
3938 if (TARGET_ALIGN_DOUBLE && regs_saved & 1)
3939 total_auto_space += 4;
3940 target_flags = save_flags;
3942 total_saved_regs_space = (regs_saved) * 4;
3944 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
3945 return total_saved_regs_space + total_auto_space;
3947 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3948 return total_saved_regs_space + total_auto_space;
3950 /* Initial gap between fp and sp is 0. */
3951 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3954 if (from == RETURN_ADDRESS_POINTER_REGNUM
3955 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
3957 int i, n = total_saved_regs_space;
3958 for (i = PR_REG-1; i >= 0; i--)
3959 if (live_regs_mask & (1 << i))
3961 return n + total_auto_space;
3967 /* Handle machine specific pragmas to be semi-compatible with Hitachi
3971 sh_handle_pragma (p_getc, p_ungetc, pname)
3972 int (* p_getc) PROTO((void));
3973 void (* p_ungetc) PROTO((int));
3978 if (strcmp (pname, "interrupt") == 0)
3979 pragma_interrupt = retval = 1;
3980 else if (strcmp (pname, "trapa") == 0)
3981 pragma_interrupt = pragma_trapa = retval = 1;
3982 else if (strcmp (pname, "nosave_low_regs") == 0)
3983 pragma_nosave_low_regs = retval = 1;
3988 /* Generate 'handle_interrupt' attribute for decls */
3991 sh_pragma_insert_attributes (node, attributes, prefix)
3998 if (! pragma_interrupt
3999 || TREE_CODE (node) != FUNCTION_DECL)
4002 /* We are only interested in fields. */
4003 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
4006 /* Add a 'handle_interrupt' attribute. */
4007 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
4012 /* Return nonzero if ATTR is a valid attribute for DECL.
4013 ATTRIBUTES are any existing attributes and ARGS are the arguments
4016 Supported attributes:
4018 interrupt_handler -- specifies this function is an interrupt handler.
4020 sp_switch -- specifies an alternate stack for an interrupt handler
4023 trap_exit -- use a trapa to exit an interrupt function instead of
4024 an rte instruction. */
4027 sh_valid_machine_decl_attribute (decl, attributes, attr, args)
4035 if (TREE_CODE (decl) != FUNCTION_DECL)
4038 if (is_attribute_p ("interrupt_handler", attr))
4043 if (is_attribute_p ("sp_switch", attr))
4045 /* The sp_switch attribute only has meaning for interrupt functions. */
4046 if (!pragma_interrupt)
4049 /* sp_switch must have an argument. */
4050 if (!args || TREE_CODE (args) != TREE_LIST)
4053 /* The argument must be a constant string. */
4054 if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
4057 sp_switch = gen_rtx (SYMBOL_REF, VOIDmode,
4058 TREE_STRING_POINTER (TREE_VALUE (args)));
4062 if (is_attribute_p ("trap_exit", attr))
4064 /* The trap_exit attribute only has meaning for interrupt functions. */
4065 if (!pragma_interrupt)
4068 /* trap_exit must have an argument. */
4069 if (!args || TREE_CODE (args) != TREE_LIST)
4072 /* The argument must be a constant integer. */
4073 if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
4076 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
4082 /* Predicates used by the templates. */
4084 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
4085 Used only in general_movsrc_operand. */
4088 system_reg_operand (op, mode)
4090 enum machine_mode mode;
4102 /* Returns 1 if OP can be source of a simple move operation.
4103 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
4104 invalid as are subregs of system registers. */
4107 general_movsrc_operand (op, mode)
4109 enum machine_mode mode;
4111 if (GET_CODE (op) == MEM)
4113 rtx inside = XEXP (op, 0);
4114 if (GET_CODE (inside) == CONST)
4115 inside = XEXP (inside, 0);
4117 if (GET_CODE (inside) == LABEL_REF)
4120 if (GET_CODE (inside) == PLUS
4121 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
4122 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
4125 /* Only post inc allowed. */
4126 if (GET_CODE (inside) == PRE_DEC)
4130 if ((mode == QImode || mode == HImode)
4131 && (GET_CODE (op) == SUBREG
4132 && GET_CODE (XEXP (op, 0)) == REG
4133 && system_reg_operand (XEXP (op, 0), mode)))
4136 return general_operand (op, mode);
4139 /* Returns 1 if OP can be a destination of a move.
4140 Same as general_operand, but no preinc allowed. */
4143 general_movdst_operand (op, mode)
4145 enum machine_mode mode;
4147 /* Only pre dec allowed. */
4148 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
4151 return general_operand (op, mode);
4154 /* Returns 1 if OP is a normal arithmetic register. */
4157 arith_reg_operand (op, mode)
4159 enum machine_mode mode;
4161 if (register_operand (op, mode))
4165 if (GET_CODE (op) == REG)
4167 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4168 regno = REGNO (SUBREG_REG (op));
4172 return (regno != T_REG && regno != PR_REG
4173 && (regno != FPUL_REG || TARGET_SH4)
4174 && regno != MACH_REG && regno != MACL_REG);
4180 fp_arith_reg_operand (op, mode)
4182 enum machine_mode mode;
4184 if (register_operand (op, mode))
4188 if (GET_CODE (op) == REG)
4190 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4191 regno = REGNO (SUBREG_REG (op));
4195 return (regno >= FIRST_PSEUDO_REGISTER
4196 || (regno >= FIRST_FP_REG && regno <= LAST_FP_REG));
4202 fp_extended_operand (op, mode)
4204 enum machine_mode mode;
4206 if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode)
4209 mode = GET_MODE (op);
4211 if (register_operand (op, mode))
4215 if (GET_CODE (op) == REG)
4217 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
4218 regno = REGNO (SUBREG_REG (op));
4222 return (regno != T_REG && regno != PR_REG && regno > 15
4223 && regno != MACH_REG && regno != MACL_REG);
4228 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
4231 arith_operand (op, mode)
4233 enum machine_mode mode;
4235 if (arith_reg_operand (op, mode))
4238 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
4244 /* Returns 1 if OP is a valid source operand for a compare insn. */
4247 arith_reg_or_0_operand (op, mode)
4249 enum machine_mode mode;
4251 if (arith_reg_operand (op, mode))
4254 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
4260 /* Returns 1 if OP is a valid source operand for a logical operation. */
4263 logical_operand (op, mode)
4265 enum machine_mode mode;
4267 if (arith_reg_operand (op, mode))
4270 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
4276 /* Nonzero if OP is a floating point value with value 0.0. */
4279 fp_zero_operand (op)
4284 if (GET_MODE (op) != SFmode)
4287 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4288 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
4291 /* Nonzero if OP is a floating point value with value 1.0. */
4299 if (GET_MODE (op) != SFmode)
4302 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
4303 return REAL_VALUES_EQUAL (r, dconst1);
4307 braf_label_ref_operand(op, mode)
4309 enum machine_mode mode;
4313 if (GET_CODE (op) != LABEL_REF)
4315 prev = prev_real_insn (XEXP (op, 0));
4316 if (GET_CODE (prev) != JUMP_INSN)
4318 prev = PATTERN (prev);
4319 if (GET_CODE (prev) != PARALLEL || XVECLEN (prev, 0) != 2)
4321 prev = XVECEXP (prev, 0, 0);
4322 if (GET_CODE (prev) != SET)
4324 prev = SET_SRC (prev);
4325 if (GET_CODE (prev) != PLUS || XEXP (prev, 1) != op)
4330 tertiary_reload_operand (op, mode)
4332 enum machine_mode mode;
4334 enum rtx_code code = GET_CODE (op);
4335 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
4342 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
4343 && GET_MODE (op) == PSImode);
4347 commutative_float_operator (op, mode)
4349 enum machine_mode mode;
4351 if (GET_MODE (op) != mode)
4353 switch (GET_CODE (op))
4363 noncommutative_float_operator (op, mode)
4365 enum machine_mode mode;
4367 if (GET_MODE (op) != mode)
4369 switch (GET_CODE (op))
4379 binary_float_operator (op, mode)
4381 enum machine_mode mode;
4383 if (GET_MODE (op) != mode)
4385 switch (GET_CODE (op))
4396 /* Return the destination address of a branch. */
4399 branch_dest (branch)
4402 rtx dest = SET_SRC (PATTERN (branch));
4405 if (GET_CODE (dest) == IF_THEN_ELSE)
4406 dest = XEXP (dest, 1);
4407 dest = XEXP (dest, 0);
4408 dest_uid = INSN_UID (dest);
4409 return insn_addresses[dest_uid];
4412 /* Return non-zero if REG is not used after INSN.
4413 We assume REG is a reload reg, and therefore does
4414 not live past labels. It may live past calls or jumps though. */
4416 reg_unused_after (reg, insn)
4423 /* If the reg is set by this instruction, then it is safe for our
4424 case. Disregard the case where this is a store to memory, since
4425 we are checking a register used in the store address. */
4426 set = single_set (insn);
4427 if (set && GET_CODE (SET_DEST (set)) != MEM
4428 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4431 while (insn = NEXT_INSN (insn))
4433 code = GET_CODE (insn);
4436 /* If this is a label that existed before reload, then the register
4437 if dead here. However, if this is a label added by reorg, then
4438 the register may still be live here. We can't tell the difference,
4439 so we just ignore labels completely. */
4440 if (code == CODE_LABEL)
4445 if (code == JUMP_INSN)
4448 /* If this is a sequence, we must handle them all at once.
4449 We could have for instance a call that sets the target register,
4450 and a insn in a delay slot that uses the register. In this case,
4451 we must return 0. */
4452 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
4457 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
4459 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
4460 rtx set = single_set (this_insn);
4462 if (GET_CODE (this_insn) == CALL_INSN)
4464 else if (GET_CODE (this_insn) == JUMP_INSN)
4466 if (INSN_ANNULLED_BRANCH_P (this_insn))
4471 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4473 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4475 if (GET_CODE (SET_DEST (set)) != MEM)
4481 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
4486 else if (code == JUMP_INSN)
4489 else if (GET_RTX_CLASS (code) == 'i')
4491 rtx set = single_set (insn);
4493 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
4495 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
4496 return GET_CODE (SET_DEST (set)) != MEM;
4497 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
4501 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
4507 extern struct obstack permanent_obstack;
4512 static rtx fpscr_rtx;
4516 push_obstacks (&permanent_obstack, &permanent_obstack);
4517 fpscr_rtx = gen_rtx (REG, PSImode, 48);
4518 REG_USERVAR_P (fpscr_rtx) = 1;
4520 mark_user_reg (fpscr_rtx);
4522 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
4523 mark_user_reg (fpscr_rtx);
4532 /* When generating reload insns, we must not create new registers. FPSCR
4533 should already have the correct value, so do nothing to change it. */
4534 if (! TARGET_FPU_SINGLE && ! reload_in_progress)
4536 addr = gen_reg_rtx (SImode);
4537 emit_insn (gen_fpu_switch0 (addr));
4540 if (! TARGET_FPU_SINGLE && ! reload_in_progress)
4542 addr = gen_reg_rtx (SImode);
4543 emit_insn (gen_fpu_switch1 (addr));
4552 if (TARGET_FPU_SINGLE && ! reload_in_progress)
4554 addr = gen_reg_rtx (SImode);
4555 emit_insn (gen_fpu_switch0 (addr));
4558 if (TARGET_FPU_SINGLE && ! reload_in_progress)
4560 addr = gen_reg_rtx (SImode);
4561 emit_insn (gen_fpu_switch1 (addr));
4566 expand_sf_unop (fun, operands)
4570 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
4574 expand_sf_binop (fun, operands)
4578 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
4583 expand_df_unop (fun, operands)
4587 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
4591 expand_df_binop (fun, operands)
4595 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
4600 expand_fp_branch (compare, branch)
4601 rtx (*compare) (), (*branch) ();
4603 (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn)
4605 emit_jump_insn ((*branch) ());
4608 /* We don't want to make fpscr call-saved, because that would prevent
4609 channging it, and it would also cost an exstra instruction to save it.
4610 We don't want it to be known as a global register either, because
4611 that disables all flow analysis. But it has to be live at the function
4612 return. Thus, we need to insert a USE at the end of the function. */
4613 /* This should best be called at about the time FINALIZE_PIC is called,
4614 but not dependent on flag_pic. Alas, there is no suitable hook there,
4615 so this gets called from HAVE_RETURN. */
4619 static int fpscr_uses = 0;
4621 if (rtx_equal_function_value_matters)
4623 emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ()));
4630 /* Due to he crude way we emit the USEs, we might end up with
4631 some extra ones. Delete all but the last one. */
4634 for (insn = get_last_insn(); insn; insn = PREV_INSN (insn))
4635 if (GET_CODE (insn) == INSN
4636 && GET_CODE (PATTERN (insn)) == USE
4637 && GET_CODE (XEXP (PATTERN (insn), 0)) == REG
4638 && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
4640 insn = PREV_INSN (insn);
4643 for (; insn; insn = PREV_INSN (insn))
4644 if (GET_CODE (insn) == INSN
4645 && GET_CODE (PATTERN (insn)) == USE
4646 && GET_CODE (XEXP (PATTERN (insn), 0)) == REG
4647 && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
4649 PUT_CODE (insn, NOTE);
4650 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4651 NOTE_SOURCE_FILE (insn) = 0;
4658 /* ??? gcc does flow analysis strictly after common subexpression
4659 elimination. As a result, common subespression elimination fails
4660 when there are some intervening statements setting the same register.
4661 If we did nothing about this, this would hurt the precision switching
4662 for SH4 badly. There is some cse after reload, but it is unable to
4663 undo the extra register pressure from the unused instructions, and
4664 it cannot remove auto-increment loads.
4666 A C code example that shows this flow/cse weakness for (at least) SH
4667 and sparc (as of gcc ss-970706) is this:
4681 So we add another pass before common subexpression elimination, to
4682 remove assignments that are dead due to a following assignment in the
4683 same basic block. */
4685 int sh_flag_remove_dead_before_cse;
4688 mark_use (x, reg_set_block)
4689 rtx x, *reg_set_block;
4695 code = GET_CODE (x);
4700 int regno = REGNO (x);
4701 int nregs = (regno < FIRST_PSEUDO_REGISTER
4702 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
4706 reg_set_block[regno + nregs - 1] = 0;
4713 rtx dest = SET_DEST (x);
4715 if (GET_CODE (dest) == SUBREG)
4716 dest = SUBREG_REG (dest);
4717 if (GET_CODE (dest) != REG)
4718 mark_use (dest, reg_set_block);
4719 mark_use (SET_SRC (x), reg_set_block);
4726 char *fmt = GET_RTX_FORMAT (code);
4728 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
4731 mark_use (XEXP (x, i), reg_set_block);
4732 else if (fmt[i] == 'E')
4733 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
4734 mark_use (XVECEXP (x, i, j), reg_set_block);
4742 remove_dead_before_cse ()
4744 rtx *reg_set_block, last, last_call, insn, set;
4747 /* This pass should run just once, after rtl generation. */
4749 if (! sh_flag_remove_dead_before_cse
4750 || rtx_equal_function_value_matters
4751 || reload_completed)
4754 sh_flag_remove_dead_before_cse = 0;
4756 reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx));
4757 bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx));
4758 last_call = last = get_last_insn ();
4759 for (insn = last; insn; insn = PREV_INSN (insn))
4761 if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
4763 if (GET_CODE (insn) == JUMP_INSN)
4765 last_call = last = insn;
4768 set = single_set (insn);
4770 /* Don't delete parts of libcalls, since that would confuse cse, loop
4772 if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
4774 else if (in_libcall)
4776 if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
4779 else if (set && GET_CODE (SET_DEST (set)) == REG)
4781 int regno = REGNO (SET_DEST (set));
4782 rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno]
4785 if (reg_set_block[regno] == ref_insn
4786 && (regno >= FIRST_PSEUDO_REGISTER
4787 || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1)
4788 && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn)))
4790 PUT_CODE (insn, NOTE);
4791 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4792 NOTE_SOURCE_FILE (insn) = 0;
4796 reg_set_block[REGNO (SET_DEST (set))] = ref_insn;
4798 if (GET_CODE (insn) == CALL_INSN)
4801 mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block);
4803 mark_use (PATTERN (insn), reg_set_block);