1 /* Output routines for GCC for Hitachi Super-H.
2 Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
4 This file is part of GNU CC.
6 GNU CC is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU CC is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU CC; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
20 /* Contributed by Steve Chamberlain (sac@cygnus.com).
21 Improved by Jim Wilson (wilson@cygnus.com). */
30 #include "insn-flags.h"
33 #include "hard-reg-set.h"
36 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
37 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
39 /* ??? The pragma interrupt support will not work for SH3. */
40 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
41 output code for the next function appropriate for an interrupt handler. */
44 /* This is set by #pragma trapa, and is similar to the above, except that
45 the compiler doesn't emit code to preserve all registers. */
46 static int pragma_trapa;
48 /* This is used for communication between SETUP_INCOMING_VARARGS and
49 sh_expand_prologue. */
50 int current_function_anonymous_args;
52 /* Global variables from toplev.c and final.c that are used within, but
53 not declared in any header file. */
54 extern char *version_string;
55 extern int *insn_addresses;
57 /* Global variables for machine-dependent things. */
59 /* Which cpu are we scheduling for. */
60 enum processor_type sh_cpu;
62 /* Saved operands from the last compare to use when we generate an scc
68 /* Provides the class number of the smallest class containing
71 int regno_reg_class[FIRST_PSEUDO_REGISTER] =
73 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
74 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
75 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
76 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
77 GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
81 /* Provide reg_class from a letter such as appears in the machine
84 enum reg_class reg_class_from_letter[] =
86 /* a */ NO_REGS, /* b */ NO_REGS, /* c */ NO_REGS, /* d */ NO_REGS,
87 /* e */ NO_REGS, /* f */ NO_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
88 /* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
89 /* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
90 /* q */ NO_REGS, /* r */ NO_REGS, /* s */ NO_REGS, /* t */ T_REGS,
91 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ NO_REGS, /* x */ MAC_REGS,
92 /* y */ NO_REGS, /* z */ R0_REGS
95 /* Print the operand address in x to the stream. */
98 print_operand_address (stream, x)
102 switch (GET_CODE (x))
105 fprintf (stream, "@%s", reg_names[REGNO (x)]);
110 rtx base = XEXP (x, 0);
111 rtx index = XEXP (x, 1);
113 switch (GET_CODE (index))
116 fprintf (stream, "@(%d,%s)", INTVAL (index),
117 reg_names[REGNO (base)]);
121 fprintf (stream, "@(r0,%s)",
122 reg_names[MAX (REGNO (base), REGNO (index))]);
133 fprintf (stream, "@-%s", reg_names[REGNO (XEXP (x, 0))]);
137 fprintf (stream, "@%s+", reg_names[REGNO (XEXP (x, 0))]);
141 output_addr_const (stream, x);
146 /* Print operand x (an rtx) in assembler syntax to file stream
147 according to modifier code.
149 '.' print a .s if insn needs delay slot
150 '@' print rte or rts depending upon pragma interruptness
151 '#' output a nop if there is nothing to put in the delay slot
152 'O' print a constant without the #
153 'R' print the LSW of a dp value - changes if in little endian
154 'S' print the MSW of a dp value - changes if in little endian
155 'T' print the next word of a dp value - same as 'R' in big endian mode. */
158 print_operand (stream, x, code)
167 fprintf (stream, ".s");
170 if (pragma_interrupt)
171 fprintf (stream, "rte");
173 fprintf (stream, "rts");
176 /* Output a nop if there's nothing in the delay slot. */
177 if (dbr_sequence_length () == 0)
178 fprintf (stream, "\n\tnop");
181 output_addr_const (stream, x);
184 fputs (reg_names[REGNO (x) + LSW], (stream));
187 fputs (reg_names[REGNO (x) + MSW], (stream));
190 /* Next word of a double. */
191 switch (GET_CODE (x))
194 fputs (reg_names[REGNO (x) + 1], (stream));
197 print_operand_address (stream,
198 XEXP (adj_offsettable_operand (x, 4), 0));
203 switch (GET_CODE (x))
206 fputs (reg_names[REGNO (x)], (stream));
209 output_address (XEXP (x, 0));
213 output_addr_const (stream, x);
220 /* Emit code to perform a block move. Choose the best method.
222 OPERANDS[0] is the destination.
223 OPERANDS[1] is the source.
224 OPERANDS[2] is the size.
225 OPERANDS[3] is the alignment safe to use. */
228 expand_block_move (operands)
231 int align = INTVAL (operands[3]);
232 int constp = (GET_CODE (operands[2]) == CONST_INT);
233 int bytes = (constp ? INTVAL (operands[2]) : 0);
235 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
236 alignment, or if it isn't a multiple of 4 bytes, then fail. */
237 if (! constp || align < 4 || (bytes % 4 != 0))
245 rtx r4 = gen_rtx (REG, SImode, 4);
246 rtx r5 = gen_rtx (REG, SImode, 5);
248 sprintf (entry, "__movstrSI%d", bytes);
249 entry_name = get_identifier (entry);
252 = copy_to_mode_reg (Pmode,
253 gen_rtx (SYMBOL_REF, Pmode,
254 IDENTIFIER_POINTER (entry_name)));
255 emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
256 emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
257 emit_insn (gen_block_move_real (func_addr_rtx));
261 /* This is the same number of bytes as a memcpy call, but to a different
262 less common function name, so this will occasionally use more space. */
263 if (! TARGET_SMALLCODE)
267 int final_switch, while_loop;
268 rtx r4 = gen_rtx (REG, SImode, 4);
269 rtx r5 = gen_rtx (REG, SImode, 5);
270 rtx r6 = gen_rtx (REG, SImode, 6);
272 entry_name = get_identifier ("__movstr");
274 = copy_to_mode_reg (Pmode,
275 gen_rtx (SYMBOL_REF, Pmode,
276 IDENTIFIER_POINTER (entry_name)));
277 emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
278 emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
280 /* r6 controls the size of the move. 16 is decremented from it
281 for each 64 bytes moved. Then the negative bit left over is used
282 as an index into a list of move instructions. e.g., a 72 byte move
283 would be set up with size(r6) = 14, for one iteration through the
284 big while loop, and a switch of -2 for the last part. */
286 final_switch = 16 - ((bytes / 4) % 16);
287 while_loop = ((bytes / 4) / 16 - 1) * 16;
288 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
289 emit_insn (gen_block_lump_real (func_addr_rtx));
296 /* Prepare operands for a move define_expand; specifically, one of the
297 operands must be in a register. */
300 prepare_move_operands (operands, mode)
302 enum machine_mode mode;
304 /* Copy the source to a register if both operands aren't registers. */
305 if (! reload_in_progress && ! reload_completed
306 && ! register_operand (operands[0], mode)
307 && ! register_operand (operands[1], mode))
308 operands[1] = copy_to_mode_reg (mode, operands[1]);
313 /* Prepare the operands for an scc instruction; make sure that the
314 compare has been done. */
316 prepare_scc_operands (code)
319 rtx t_reg = gen_rtx (REG, SImode, T_REG);
320 enum rtx_code oldcode = code;
322 /* First need a compare insn. */
326 /* It isn't possible to handle this case. */
343 rtx tmp = sh_compare_op0;
344 sh_compare_op0 = sh_compare_op1;
345 sh_compare_op1 = tmp;
348 sh_compare_op0 = force_reg (SImode, sh_compare_op0);
349 if (code != EQ && code != NE
350 && (sh_compare_op1 != const0_rtx
351 || code == GTU || code == GEU || code == LTU || code == LEU))
352 sh_compare_op1 = force_reg (SImode, sh_compare_op1);
354 emit_insn (gen_rtx (SET, VOIDmode, t_reg,
355 gen_rtx (code, SImode, sh_compare_op0,
361 /* Called from the md file, set up the operands of a compare instruction. */
364 from_compare (operands, code)
368 if (code != EQ && code != NE)
370 /* Force args into regs, since we can't use constants here. */
371 sh_compare_op0 = force_reg (SImode, sh_compare_op0);
372 if (sh_compare_op1 != const0_rtx
373 || code == GTU || code == GEU || code == LTU || code == LEU)
374 sh_compare_op1 = force_reg (SImode, sh_compare_op1);
376 operands[1] = sh_compare_op0;
377 operands[2] = sh_compare_op1;
380 /* Functions to output assembly code. */
382 /* Return a sequence of instructions to perform DI or DF move.
384 Since the SH cannot move a DI or DF in one instruction, we have
385 to take care when we see overlapping source and dest registers. */
388 output_movedouble (insn, operands, mode)
391 enum machine_mode mode;
393 rtx dst = operands[0];
394 rtx src = operands[1];
396 if (GET_CODE (dst) == MEM
397 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
398 return "mov.l %T1,%0\n\tmov.l %1,%0";
400 if (register_operand (dst, mode)
401 && register_operand (src, mode))
403 if (REGNO (src) == MACH_REG)
404 return "sts mach,%S0\n\tsts macl,%R0";
406 /* When mov.d r1,r2 do r2->r3 then r1->r2;
407 when mov.d r1,r0 do r1->r0 then r2->r1. */
409 if (REGNO (src) + 1 == REGNO (dst))
410 return "mov %T1,%T0\n\tmov %1,%0";
412 return "mov %1,%0\n\tmov %T1,%T0";
414 else if (GET_CODE (src) == CONST_INT)
416 if (INTVAL (src) < 0)
417 output_asm_insn ("mov #-1,%S0", operands);
419 output_asm_insn ("mov #0,%S0", operands);
423 else if (GET_CODE (src) == MEM)
426 int dreg = REGNO (dst);
427 rtx inside = XEXP (src, 0);
429 if (GET_CODE (inside) == REG)
430 ptrreg = REGNO (inside);
431 else if (GET_CODE (inside) == PLUS)
433 ptrreg = REGNO (XEXP (inside, 0));
434 /* ??? A r0+REG address shouldn't be possible here, because it isn't
435 an offsettable address. Unfortunately, offsettable addresses use
436 QImode to check the offset, and a QImode offsettable address
437 requires r0 for the other operand, which is not currently
438 supported, so we can't use the 'o' constraint.
439 Thus we must check for and handle r0+REG addresses here.
440 We punt for now, since this is likely very rare. */
441 if (GET_CODE (XEXP (inside, 1)) == REG)
444 else if (GET_CODE (inside) == LABEL_REF)
445 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
446 else if (GET_CODE (inside) == POST_INC)
447 return "mov.l %1,%0\n\tmov.l %1,%T0";
451 /* Work out the safe way to copy. Copy into the second half first. */
453 return "mov.l %T1,%T0\n\tmov.l %1,%0";
456 return "mov.l %1,%0\n\tmov.l %T1,%T0";
459 /* Print an instruction which would have gone into a delay slot after
460 another instruction, but couldn't because the other instruction expanded
461 into a sequence where putting the slot insn at the end wouldn't work. */
467 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1);
469 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
472 /* We can't tell if we need a register as a scratch for the jump
473 until after branch shortening, and then it's too late to allocate a
474 register the 'proper' way. These instruction sequences are rare
475 anyway, so to avoid always using a reg up from our limited set, we'll
476 grab one when we need one on output. */
478 /* ??? Should fix compiler so that using a clobber scratch in jump
479 instructions works, and then this will be unnecessary. */
482 output_far_jump (insn, op)
486 rtx thislab = gen_label_rtx ();
488 /* Output the delay slot insn first if any. */
489 if (dbr_sequence_length ())
490 print_slot (final_sequence);
492 output_asm_insn ("mov.l r13,@-r15", 0);
493 output_asm_insn ("mov.l %O0,r13", &thislab);
494 output_asm_insn ("jmp @r13", 0);
495 output_asm_insn ("mov.l @r15+,r13", 0);
496 output_asm_insn (".align 2", 0);
497 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (thislab));
498 output_asm_insn (".long %O0", &op);
502 /* Local label counter, used for constants in the pool and inside
507 /* Output code for ordinary branches. */
510 output_branch (logic, insn, operands)
517 switch (get_attr_length (insn))
520 /* A branch with an unfilled delay slot. */
522 /* Simple branch in range -252..+258 bytes */
523 return logic ? "bt%. %l0" : "bf%. %l0";
526 /* A branch with an unfilled delay slot. */
528 /* Branch in range -4092..+4098 bytes. */
530 /* The call to print_slot will clobber the operands. */
531 rtx op0 = operands[0];
535 fprintf (asm_out_file, "\tb%c.s\tLF%d\n", logic ? 'f' : 't',
537 print_slot (final_sequence);
540 fprintf (asm_out_file, "\tb%c\tLF%d\n", logic ? 'f' : 't', label);
542 output_asm_insn ("bra %l0", &op0);
543 fprintf (asm_out_file, "\tnop\n");
544 fprintf (asm_out_file, "LF%d:\n", label);
549 /* A branch with an unfilled delay slot. */
551 /* Branches a long way away. */
553 /* The call to print_slot will clobber the operands. */
554 rtx op0 = operands[0];
558 fprintf (asm_out_file, "\tb%c.s\tLF%d\n", logic ? 'f' : 't',
560 print_slot (final_sequence);
563 fprintf (asm_out_file, "\tb%c\tLF%d\n", logic ? 'f' : 't', label);
565 output_far_jump (insn, op0);
566 fprintf (asm_out_file, "LF%d:\n", label);
573 /* A copy of the option structure defined in toplev.c. */
582 /* Output a single output option string NAME to FILE, without generating
583 lines longer than MAX. */
586 output_option (file, sep, type, name, indent, pos, max)
595 if (strlen (sep) + strlen (type) + strlen (name) + pos > max)
597 fprintf (file, indent);
598 return fprintf (file, "%s%s", type, name);
600 return pos + fprintf (file, "%s%s%s", sep, type, name);
603 /* A copy of the target_switches variable in toplev.c. */
609 } m_options[] = TARGET_SWITCHES;
611 /* Output all options to the assembly language file. */
614 output_options (file, f_options, f_len, W_options, W_len,
615 pos, max, sep, indent, term)
617 struct option *f_options;
618 struct option *W_options;
629 pos = output_option (file, sep, "-O", "", indent, pos, max);
630 if (write_symbols != NO_DEBUG)
631 pos = output_option (file, sep, "-g", "", indent, pos, max);
633 pos = output_option (file, sep, "-p", "", indent, pos, max);
634 if (profile_block_flag)
635 pos = output_option (file, sep, "-a", "", indent, pos, max);
637 for (j = 0; j < f_len; j++)
638 if (*f_options[j].variable == f_options[j].on_value)
639 pos = output_option (file, sep, "-f", f_options[j].string,
642 for (j = 0; j < W_len; j++)
643 if (*W_options[j].variable == W_options[j].on_value)
644 pos = output_option (file, sep, "-W", W_options[j].string,
647 for (j = 0; j < sizeof m_options / sizeof m_options[0]; j++)
648 if (m_options[j].name[0] != '\0'
649 && m_options[j].value > 0
650 && ((m_options[j].value & target_flags)
651 == m_options[j].value))
652 pos = output_option (file, sep, "-m", m_options[j].name,
655 fprintf (file, term);
658 /* Output to FILE the start of the assembler file. */
661 output_file_start (file, f_options, f_len, W_options, W_len)
663 struct option *f_options;
664 struct option *W_options;
669 output_file_directive (file, main_input_filename);
671 /* Switch to the data section so that the coffsem symbol and the
672 gcc2_compiled. symbol aren't in the text section. */
675 pos = fprintf (file, "\n! Hitachi SH cc1 (%s) arguments:", version_string);
676 output_options (file, f_options, f_len, W_options, W_len,
677 pos, 75, " ", "\n! ", "\n\n");
679 if (TARGET_LITTLE_ENDIAN)
680 fprintf (file, "\t.little\n");
683 /* Actual number of instructions used to make a shift by N. */
684 static char ashiftrt_insns[] =
685 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
687 /* Left shift and logical right shift are the same. */
688 static char shift_insns[] =
689 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
691 /* Individual shift amounts needed to get the above length sequences.
692 One bit right shifts clobber the T bit, so when possible, put one bit
693 shifts in the middle of the sequence, so the ends are eligible for
694 branch delay slots. */
695 static short shift_amounts[32][5] = {
696 {0}, {1}, {2}, {2, 1},
697 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
698 {8}, {8, 1}, {8, 2}, {8, 1, 2},
699 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
700 {16}, {16, 1}, {16, 2}, {16, 1, 2},
701 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
702 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
703 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
705 /* This is used in length attributes in sh.md to help compute the length
706 of arbitrary constant shift instructions. */
709 shift_insns_rtx (insn)
712 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
713 int shift_count = INTVAL (XEXP (set_src, 1));
714 enum rtx_code shift_code = GET_CODE (set_src);
719 return ashiftrt_insns[shift_count];
722 return shift_insns[shift_count];
728 /* Return the cost of a shift. */
734 int value = INTVAL (XEXP (x, 1));
736 /* If shift by a non constant, then this will be expensive. */
737 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
741 /* If not an sh3 then we don't even have an instruction for it. */
745 /* Otherwise, return the true cost in instructions. */
746 if (GET_CODE (x) == ASHIFTRT)
747 return ashiftrt_insns[value];
749 return shift_insns[value];
752 /* Return the cost of an AND operation. */
760 /* Anding with a register is a single cycle and instruction. */
761 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
764 i = INTVAL (XEXP (x, 1));
765 /* These constants are single cycle extu.[bw] instructions. */
766 if (i == 0xff || i == 0xffff)
768 /* Constants that can be used in an and immediate instruction is a single
769 cycle, but this requires r0, so make it a little more expensive. */
770 if (CONST_OK_FOR_L (i))
772 /* Constants that can be loaded with a mov immediate and an and.
773 This case is probably unnecessary. */
774 if (CONST_OK_FOR_I (i))
776 /* Any other constants requires a 2 cycle pc-relative load plus an and.
777 This case is probably unnecessary. */
781 /* Return the cost of a multiply. */
788 /* We have a mul insn, so we can never take more than the mul and the
789 read of the mac reg, but count more because of the latency and extra
791 if (TARGET_SMALLCODE)
796 /* If we're aiming at small code, then just count the number of
797 insns in a multiply call sequence. */
798 if (TARGET_SMALLCODE)
801 /* Otherwise count all the insns in the routine we'd be calling too. */
805 /* Code to expand a shift. */
808 gen_ashift (type, n, reg)
813 /* Negative values here come from the shift_amounts array. */
826 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
830 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
832 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
835 emit_insn (gen_ashlsi3_k (reg, reg, GEN_INT (n)));
840 /* Output RTL to split a constant shift into its component SH constant
841 shift instructions. */
843 /* ??? For SH3, should reject constant shifts when slower than loading the
844 shift count into a register? */
847 gen_shifty_op (code, operands)
851 int value = INTVAL (operands[2]);
856 if (code == LSHIFTRT)
858 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
859 emit_insn (gen_movt (operands[0]));
862 else if (code == ASHIFT)
864 /* There is a two instruction sequence for 31 bit left shifts,
865 but it requires r0. */
866 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
868 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
869 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
875 max = shift_insns[value];
876 for (i = 0; i < max; i++)
877 gen_ashift (code, shift_amounts[value][i], operands[0]);
880 /* Output RTL for an arithmetic right shift. */
882 /* ??? Rewrite to use super-optimizer sequences. */
885 expand_ashiftrt (operands)
893 if (TARGET_SH3 && GET_CODE (operands[2]) != CONST_INT)
895 rtx count = copy_to_mode_reg (SImode, operands[2]);
896 emit_insn (gen_negsi2 (count, count));
897 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
900 if (GET_CODE (operands[2]) != CONST_INT)
903 value = INTVAL (operands[2]);
907 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
910 else if (value >= 16 && value <= 19)
912 wrk = gen_reg_rtx (SImode);
913 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
916 gen_ashift (ASHIFTRT, 1, wrk);
917 emit_move_insn (operands[0], wrk);
920 /* Expand a short sequence inline, longer call a magic routine. */
923 wrk = gen_reg_rtx (SImode);
924 emit_move_insn (wrk, operands[1]);
926 gen_ashift (ASHIFTRT, 1, wrk);
927 emit_move_insn (operands[0], wrk);
931 wrk = gen_reg_rtx (Pmode);
933 /* Load the value into an arg reg and call a helper. */
934 emit_move_insn (gen_rtx (REG, SImode, 4), operands[1]);
935 sprintf (func, "__ashiftrt_r4_%d", value);
936 func_name = get_identifier (func);
937 emit_move_insn (wrk, gen_rtx (SYMBOL_REF, Pmode,
938 IDENTIFIER_POINTER (func_name)));
939 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
940 emit_move_insn (operands[0], gen_rtx (REG, SImode, 4));
944 /* The SH cannot load a large constant into a register, constants have to
945 come from a pc relative load. The reference of a pc relative load
946 instruction must be less than 1k infront of the instruction. This
947 means that we often have to dump a constant inside a function, and
948 generate code to branch around it.
950 It is important to minimize this, since the branches will slow things
951 down and make things bigger.
953 Worst case code looks like:
971 We fix this by performing a scan before scheduling, which notices which
972 instructions need to have their operands fetched from the constant table
973 and builds the table.
977 scan, find an instruction which needs a pcrel move. Look forward, find the
978 last barrier which is within MAX_COUNT bytes of the requirement.
979 If there isn't one, make one. Process all the instructions between
980 the find and the barrier.
982 In the above example, we can tell that L3 is within 1k of L1, so
983 the first move can be shrunk from the 3 insn+constant sequence into
984 just 1 insn, and the constant moved to L3 to make:
995 Then the second move becomes the target for the shortening process. */
999 rtx value; /* Value in table. */
1000 rtx label; /* Label of value. */
1001 enum machine_mode mode; /* Mode of value. */
1004 /* The maximum number of constants that can fit into one pool, since
1005 the pc relative range is 0...1020 bytes and constants are at least 4
1008 #define MAX_POOL_SIZE (1020/4)
1009 static pool_node pool_vector[MAX_POOL_SIZE];
1010 static int pool_size;
1012 /* ??? If we need a constant in HImode which is the truncated value of a
1013 constant we need in SImode, we could combine the two entries thus saving
1014 two bytes. Is this common enough to be worth the effort of implementing
1017 /* ??? This stuff should be done at the same time that we shorten branches.
1018 As it is now, we must assume that all branches are the maximum size, and
1019 this causes us to almost always output constant pools sooner than
1022 /* Add a constant to the pool and return its label. */
1025 add_constant (x, mode)
1027 enum machine_mode mode;
1032 /* First see if we've already got it. */
1033 for (i = 0; i < pool_size; i++)
1035 if (x->code == pool_vector[i].value->code
1036 && mode == pool_vector[i].mode)
1038 if (x->code == CODE_LABEL)
1040 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
1043 if (rtx_equal_p (x, pool_vector[i].value))
1044 return pool_vector[i].label;
1048 /* Need a new one. */
1049 pool_vector[pool_size].value = x;
1050 lab = gen_label_rtx ();
1051 pool_vector[pool_size].mode = mode;
1052 pool_vector[pool_size].label = lab;
1057 /* Output the literal table. */
1066 /* Do two passes, first time dump out the HI sized constants. */
1068 for (i = 0; i < pool_size; i++)
1070 pool_node *p = &pool_vector[i];
1072 if (p->mode == HImode)
1076 scan = emit_insn_after (gen_align_2 (), scan);
1079 scan = emit_label_after (p->label, scan);
1080 scan = emit_insn_after (gen_consttable_2 (p->value), scan);
1086 for (i = 0; i < pool_size; i++)
1088 pool_node *p = &pool_vector[i];
1098 scan = emit_label_after (gen_label_rtx (), scan);
1099 scan = emit_insn_after (gen_align_4 (), scan);
1101 scan = emit_label_after (p->label, scan);
1102 scan = emit_insn_after (gen_consttable_4 (p->value), scan);
1108 scan = emit_label_after (gen_label_rtx (), scan);
1109 scan = emit_insn_after (gen_align_4 (), scan);
1111 scan = emit_label_after (p->label, scan);
1112 scan = emit_insn_after (gen_consttable_8 (p->value), scan);
1120 scan = emit_insn_after (gen_consttable_end (), scan);
1121 scan = emit_barrier_after (scan);
1125 /* Return non-zero if constant would be an ok source for a
1126 mov.w instead of a mov.l. */
1132 return (GET_CODE (src) == CONST_INT
1133 && INTVAL (src) >= -32768
1134 && INTVAL (src) <= 32767);
1137 /* Non-zero if the insn is a move instruction which needs to be fixed. */
1139 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
1140 CONST_DOUBLE input value is CONST_OK_FOR_I. For a SFmode move, we don't
1141 need to fix it if the input value is CONST_OK_FOR_I. */
1147 if (GET_CODE (insn) == INSN
1148 && GET_CODE (PATTERN (insn)) == SET
1149 /* We can load any 8 bit value if we don't care what the high
1150 order bits end up as. */
1151 && GET_MODE (SET_DEST (PATTERN (insn))) != QImode
1152 && CONSTANT_P (SET_SRC (PATTERN (insn)))
1153 && (GET_CODE (SET_SRC (PATTERN (insn))) != CONST_INT
1154 || ! CONST_OK_FOR_I (INTVAL (SET_SRC (PATTERN (insn))))))
1160 /* Find the last barrier from insn FROM which is close enough to hold the
1161 constant pool. If we can't find one, then create one near the end of
1164 /* ??? It would be good to put constant pool tables between a case jump and
1165 the jump table. This fails for two reasons. First, there is no
1166 barrier after the case jump. This is a bug in the casesi pattern.
1167 Second, inserting the table here may break the mova instruction that
1168 loads the jump table address, by moving the jump table too far away.
1169 We fix that problem by never outputting the constant pool between a mova
1180 rtx found_barrier = 0;
1183 /* For HImode: range is 510, add 4 because pc counts from address of
1184 second instruction after this one, subtract 2 for the jump instruction
1185 that we may need to emit before the table. This gives 512.
1186 For SImode: range is 1020, add 4 because pc counts from address of
1187 second instruction after this one, subtract 2 in case pc is 2 byte
1188 aligned, subtract 2 for the jump instruction that we may need to emit
1189 before the table. This gives 1020. */
1190 while (from && count_si < 1020 && count_hi < 512)
1194 if (GET_CODE (from) == BARRIER)
1195 found_barrier = from;
1197 /* Count the length of this insn - we assume that all moves will
1198 be 2 bytes long, except the DImode/DFmode movess. */
1200 if (broken_move (from))
1202 rtx src = SET_SRC (PATTERN (from));
1208 inc = (GET_MODE_SIZE (GET_MODE (src)) > 4) ? 4 : 2;
1211 inc = get_attr_length (from);
1213 if (GET_CODE (from) == INSN
1214 && GET_CODE (PATTERN (from)) == SET
1215 && GET_CODE (SET_SRC (PATTERN (from))) == UNSPEC
1216 && XINT (SET_SRC (PATTERN (from)), 1) == 1)
1218 else if (GET_CODE (from) == JUMP_INSN
1219 && (GET_CODE (PATTERN (from)) == ADDR_VEC
1220 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
1227 from = NEXT_INSN (from);
1230 /* Insert the constant pool table before the mova instruction, to prevent
1231 the mova label reference from going out of range. */
1235 if (! found_barrier)
1237 /* We didn't find a barrier in time to dump our stuff,
1238 so we'll make one. */
1239 rtx label = gen_label_rtx ();
1241 /* We went one instruction too far above. */
1242 from = PREV_INSN (from);
1243 /* Walk back to be just before any jump or label.
1244 Putting it before a label reduces the number of times the branch
1245 around the constant pool table will be hit. Putting it before
1246 a jump makes it more likely that the bra delay slot will be
1248 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
1249 || GET_CODE (from) == CODE_LABEL)
1250 from = PREV_INSN (from);
1252 from = emit_jump_insn_after (gen_jump (label), from);
1253 JUMP_LABEL (from) = label;
1254 found_barrier = emit_barrier_after (from);
1255 emit_label_after (label, found_barrier);
1258 return found_barrier;
1261 /* Exported to toplev.c.
1263 Scan the function looking for move instructions which have to be changed to
1264 pc-relative loads and insert the literal tables. */
1267 machine_dependent_reorg (first)
1272 for (insn = first; insn; insn = NEXT_INSN (insn))
1274 if (broken_move (insn))
1277 /* Scan ahead looking for a barrier to stick the constant table
1279 rtx barrier = find_barrier (insn);
1281 /* Now find all the moves between the points and modify them. */
1282 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
1284 if (broken_move (scan))
1286 rtx pat = PATTERN (scan);
1287 rtx src = SET_SRC (pat);
1288 rtx dst = SET_DEST (pat);
1289 enum machine_mode mode = GET_MODE (dst);
1294 if (mode == SImode && hi_const (src))
1299 while (GET_CODE (dst) == SUBREG)
1301 offset += SUBREG_WORD (dst);
1302 dst = SUBREG_REG (dst);
1304 dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
1307 lab = add_constant (src, mode);
1308 newsrc = gen_rtx (MEM, mode,
1309 gen_rtx (LABEL_REF, VOIDmode, lab));
1310 RTX_UNCHANGING_P (newsrc) = 1;
1311 newinsn = emit_insn_after (gen_rtx (SET, VOIDmode,
1312 dst, newsrc), scan);
1318 dump_table (barrier);
1323 /* Dump out instruction addresses, which is useful for debugging the
1324 constant pool table stuff. */
1326 /* ??? This is unnecessary, and probably should be deleted. This makes
1327 the insn_addresses declaration above unnecessary. */
1329 /* ??? The addresses printed by this routine for insns are nonsense for
1330 insns which are inside of a sequence where none of the inner insns have
1331 variable length. This is because the second pass of shorten_branches
1332 does not bother to update them. */
1335 final_prescan_insn (insn, opvec, noperands)
1340 if (TARGET_DUMPISIZE)
1341 fprintf (asm_out_file, "\n! at %04x\n", insn_addresses[INSN_UID (insn)]);
1344 /* Dump out any constants accumulated in the final pass. These will
1345 will only be labels. */
1348 output_jump_label_table ()
1354 fprintf (asm_out_file, "\t.align 2\n");
1355 for (i = 0; i < pool_size; i++)
1357 pool_node *p = &pool_vector[i];
1359 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
1360 CODE_LABEL_NUMBER (p->label));
1361 output_asm_insn (".long %O0", &p->value);
1369 /* A full frame looks like:
1373 [ if current_function_anonymous_args
1386 local-0 <- fp points here. */
1388 /* Number of bytes pushed for anonymous args, used to pass information
1389 between expand_prologue and expand_epilogue. */
1391 static int extra_push;
1393 /* Adjust the stack and return the number of bytes taken to do it. */
1396 output_stack_adjust (size)
1401 rtx val = GEN_INT (size);
1404 if (! CONST_OK_FOR_I (size))
1406 rtx reg = gen_rtx (REG, SImode, 3);
1407 emit_insn (gen_movsi (reg, val));
1411 insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
1416 /* Output RTL to push register RN onto the stack. */
1423 x = emit_insn (gen_push (gen_rtx (REG, SImode, rn)));
1424 REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
1425 gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
1428 /* Output RTL to pop register RN from the stack. */
1435 x = emit_insn (gen_pop (gen_rtx (REG, SImode, rn)));
1436 REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
1437 gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
1440 /* Generate code to push the regs specified in the mask, and return
1441 the number of bytes the insns take. */
1449 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1450 if (mask & (1 << i))
1454 /* Work out the registers which need to be saved, both as a mask and a
1457 If doing a pragma interrupt function, then push all regs used by the
1458 function, and if we call another function (we can tell by looking at PR),
1459 make sure that all the regs it clobbers are safe too. */
1462 calc_live_regs (count_ptr)
1466 int live_regs_mask = 0;
1469 for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
1471 if (pragma_interrupt && ! pragma_trapa)
1473 /* Need to save all the regs ever live. */
1474 if ((regs_ever_live[reg]
1475 || (call_used_regs[reg] && regs_ever_live[PR_REG]))
1476 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
1477 && reg != T_REG && reg != GBR_REG)
1479 live_regs_mask |= 1 << reg;
1485 /* Only push those regs which are used and need to be saved. */
1486 if (regs_ever_live[reg] && ! call_used_regs[reg])
1488 live_regs_mask |= (1 << reg);
1495 return live_regs_mask;
1498 /* Code to generate prologue and epilogue sequences */
1501 sh_expand_prologue ()
1505 live_regs_mask = calc_live_regs (&d);
1507 /* We have pretend args if we had an object sent partially in registers
1508 and partially on the stack, e.g. a large structure. */
1509 output_stack_adjust (-current_function_pretend_args_size);
1513 /* This is set by SETUP_VARARGS to indicate that this is a varargs
1514 routine. Clear it here so that the next function isn't affected. */
1515 if (current_function_anonymous_args)
1517 current_function_anonymous_args = 0;
1519 /* Push arg regs as if they'd been provided by caller in stack. */
1520 for (i = 0; i < NPARM_REGS; i++)
1522 int rn = NPARM_REGS + FIRST_PARM_REG - i - 1;
1523 if (i > NPARM_REGS - current_function_args_info)
1529 push_regs (live_regs_mask);
1530 output_stack_adjust (-get_frame_size ());
1532 if (frame_pointer_needed)
1533 emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
1537 sh_expand_epilogue ()
1542 live_regs_mask = calc_live_regs (&d);
1544 if (frame_pointer_needed)
1545 emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
1547 output_stack_adjust (get_frame_size ());
1549 /* Pop all the registers. */
1551 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1553 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
1554 if (live_regs_mask & (1 << j))
1558 output_stack_adjust (extra_push + current_function_pretend_args_size);
1561 /* Clear variables at function end. */
1564 function_epilogue (stream, size)
1568 pragma_interrupt = pragma_trapa = 0;
1571 /* Define the offset between two registers, one to be eliminated, and
1572 the other its replacement, at the start of a routine. */
1575 initial_elimination_offset (from, to)
1580 int total_saved_regs_space;
1581 int total_auto_space = get_frame_size ();
1583 calc_live_regs (®s_saved);
1584 total_saved_regs_space = (regs_saved) * 4;
1586 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
1587 return total_saved_regs_space + total_auto_space;
1589 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
1590 return total_saved_regs_space + total_auto_space;
1592 /* Initial gap between fp and sp is 0. */
1593 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
1599 /* Handle machine specific pragmas to be semi-compatible with Hitachi
1603 handle_pragma (file)
1611 while (c == ' ' || c == '\t')
1614 if (c == '\n' || c == EOF)
1617 while (psize < sizeof (pbuf) - 1 && c != '\n')
1620 if (psize == 9 && strncmp (pbuf, "interrupt", 9) == 0)
1622 pragma_interrupt = 1;
1625 if (psize == 5 && strncmp (pbuf, "trapa", 5) == 0)
1627 pragma_interrupt = pragma_trapa = 1;
1635 /* Predicates used by the templates. */
1637 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
1638 Used only in general_movsrc_operand. */
1641 system_reg_operand (op, mode)
1643 enum machine_mode mode;
1655 /* Returns 1 if OP can be source of a simple move operation.
1656 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
1657 invalid as are subregs of system registers. */
1660 general_movsrc_operand (op, mode)
1662 enum machine_mode mode;
1664 if (GET_CODE (op) == MEM)
1666 rtx inside = XEXP (op, 0);
1667 if (GET_CODE (inside) == CONST)
1668 inside = XEXP (inside, 0);
1670 if (GET_CODE (inside) == LABEL_REF)
1673 if (GET_CODE (inside) == PLUS
1674 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
1675 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
1678 /* Only post inc allowed. */
1679 if (GET_CODE (inside) == PRE_DEC)
1683 if ((mode == QImode || mode == HImode)
1684 && (GET_CODE (op) == SUBREG
1685 && GET_CODE (XEXP (op, 0)) == REG
1686 && system_reg_operand (XEXP (op, 0), mode)))
1689 return general_operand (op, mode);
1692 /* Returns 1 if OP can be a destination of a move.
1693 Same as general_operand, but no preinc allowed. */
1696 general_movdst_operand (op, mode)
1698 enum machine_mode mode;
1700 /* Only pre dec allowed. */
1701 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
1704 return general_operand (op, mode);
1707 /* Returns 1 if OP is a normal arithmetic register. */
1710 arith_reg_operand (op, mode)
1712 enum machine_mode mode;
1714 if (register_operand (op, mode))
1716 if (GET_CODE (op) == REG)
1717 return (REGNO (op) != T_REG
1718 && REGNO (op) != PR_REG
1719 && REGNO (op) != MACH_REG
1720 && REGNO (op) != MACL_REG);
1726 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
1729 arith_operand (op, mode)
1731 enum machine_mode mode;
1733 if (arith_reg_operand (op, mode))
1736 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op)))
1742 /* Returns 1 if OP is a valid source operand for a compare insn. */
1745 arith_reg_or_0_operand (op, mode)
1747 enum machine_mode mode;
1749 if (arith_reg_operand (op, mode))
1752 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
1758 /* Returns 1 if OP is a valid source operand for a logical operation. */
1761 logical_operand (op, mode)
1763 enum machine_mode mode;
1765 if (arith_reg_operand (op, mode))
1768 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op)))
1774 /* Determine where to put an argument to a function.
1775 Value is zero to push the argument on the stack,
1776 or a hard register in which to store the argument.
1778 MODE is the argument's machine mode.
1779 TYPE is the data type of the argument (as a tree).
1780 This is null for libcalls where that information may
1782 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1783 the preceding args and about the function being called.
1784 NAMED is nonzero if this argument is a named parameter
1785 (otherwise it is an extra parameter matching an ellipsis). */
1788 sh_function_arg (cum, mode, type, named)
1789 CUMULATIVE_ARGS cum;
1790 enum machine_mode mode;
1796 int rr = (ROUND_REG (cum, mode));
1798 if (rr < NPARM_REGS)
1799 return ((type == 0 || ! TREE_ADDRESSABLE (type))
1800 ? gen_rtx (REG, mode, FIRST_PARM_REG + rr) : 0);
1805 /* For an arg passed partly in registers and partly in memory,
1806 this is the number of registers used.
1807 For args passed entirely in registers or entirely in memory, zero.
1808 Any arg that starts in the first 4 regs but won't entirely fit in them
1809 needs partial registers on the SH. */
1812 sh_function_arg_partial_nregs (cum, mode, type, named)
1813 CUMULATIVE_ARGS cum;
1814 enum machine_mode mode;
1818 if (cum < NPARM_REGS)
1820 if ((type == 0 || ! TREE_ADDRESSABLE (type))
1821 && (cum + (mode == BLKmode
1822 ? ROUND_ADVANCE (int_size_in_bytes (type))
1823 : ROUND_ADVANCE (GET_MODE_SIZE (mode))) - NPARM_REGS > 0))
1824 return NPARM_REGS - cum;