/* Output routines for GCC for Hitachi Super-H.
- Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
+ Copyright (C) 1993-1998 Free Software Foundation, Inc.
This file is part of GNU CC.
#include "flags.h"
#include "insn-flags.h"
#include "expr.h"
+#include "function.h"
#include "regs.h"
#include "hard-reg-set.h"
#include "output.h"
#include "insn-attr.h"
+int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
+
#define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
#define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
output code for the next function appropriate for an interrupt handler. */
int pragma_interrupt;
+/* This is set by the trap_exit attribute for functions. It specifies
+ a trap number to be used in a trapa instruction at function exit
+ (instead of an rte instruction). */
+int trap_exit;
+
+/* This is used by the sp_switch attribute for functions. It specifies
+ a variable holding the address of the stack the interrupt function
+ should switch to/from at entry/exit. */
+rtx sp_switch;
+
/* This is set by #pragma trapa, and is similar to the above, except that
the compiler doesn't emit code to preserve all registers. */
static int pragma_trapa;
rtx sh_compare_op0;
rtx sh_compare_op1;
+enum machine_mode sh_addr_diff_vec_mode;
+
/* Provides the class number of the smallest class containing
reg number. */
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
GENERAL_REGS, PR_REGS, T_REGS, NO_REGS,
- MAC_REGS, MAC_REGS, FPUL_REGS, NO_REGS,
+ MAC_REGS, MAC_REGS, FPUL_REGS, GENERAL_REGS,
FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
FP_REGS, FP_REGS, FP_REGS, FP_REGS,
+ DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+ DF_REGS, DF_REGS, DF_REGS, DF_REGS,
+ FPSCR_REGS,
+};
+
+char fp_reg_names[][5] =
+{
+ "fr0", "fr1", "fr2", "fr3", "fr4", "fr5", "fr6", "fr7",
+ "fr8", "fr9", "fr10", "fr11", "fr12", "fr13", "fr14", "fr15",
+ "fpul",
+ "xd0","xd2","xd4", "xd6", "xd8", "xd10", "xd12", "xd14",
};
/* Provide reg_class from a letter such as appears in the machine
enum reg_class reg_class_from_letter[] =
{
- /* a */ NO_REGS, /* b */ NO_REGS, /* c */ NO_REGS, /* d */ NO_REGS,
+ /* a */ ALL_REGS, /* b */ NO_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
/* e */ NO_REGS, /* f */ FP_REGS, /* g */ NO_REGS, /* h */ NO_REGS,
/* i */ NO_REGS, /* j */ NO_REGS, /* k */ NO_REGS, /* l */ PR_REGS,
/* m */ NO_REGS, /* n */ NO_REGS, /* o */ NO_REGS, /* p */ NO_REGS,
/* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
/* y */ FPUL_REGS, /* z */ R0_REGS
};
+
+int assembler_dialect;
+
+rtx get_fpscr_rtx ();
+void emit_sf_insn ();
+void emit_df_insn ();
+
+static void split_branches PROTO ((rtx));
\f
/* Print the operand address in x to the stream. */
switch (GET_CODE (x))
{
case REG:
- fprintf (stream, "@%s", reg_names[REGNO (x)]);
+ case SUBREG:
+ fprintf (stream, "@%s", reg_names[true_regnum (x)]);
break;
case PLUS:
{
case CONST_INT:
fprintf (stream, "@(%d,%s)", INTVAL (index),
- reg_names[REGNO (base)]);
+ reg_names[true_regnum (base)]);
break;
case REG:
- fprintf (stream, "@(r0,%s)",
- reg_names[MAX (REGNO (base), REGNO (index))]);
- break;
+ case SUBREG:
+ {
+ int base_num = true_regnum (base);
+ int index_num = true_regnum (index);
+
+ fprintf (stream, "@(r0,%s)",
+ reg_names[MAX (base_num, index_num)]);
+ break;
+ }
default:
debug_rtx (x);
break;
case PRE_DEC:
- fprintf (stream, "@-%s", reg_names[REGNO (XEXP (x, 0))]);
+ fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
break;
case POST_INC:
- fprintf (stream, "@%s+", reg_names[REGNO (XEXP (x, 0))]);
+ fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
break;
default:
according to modifier code.
'.' print a .s if insn needs delay slot
- '@' print rte or rts depending upon pragma interruptness
+ ',' print LOCAL_LABEL_PREFIX
+ '@' print trap, rte or rts depending upon pragma interruptness
'#' output a nop if there is nothing to put in the delay slot
'O' print a constant without the #
'R' print the LSW of a dp value - changes if in little endian
'S' print the MSW of a dp value - changes if in little endian
- 'T' print the next word of a dp value - same as 'R' in big endian mode. */
+ 'T' print the next word of a dp value - same as 'R' in big endian mode.
+ 'o' output an operator. */
void
print_operand (stream, x, code)
case '.':
if (final_sequence
&& ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- fprintf (stream, ".s");
+ fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
+ break;
+ case ',':
+ fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
break;
case '@':
- if (pragma_interrupt)
+ {
+ int interrupt_handler;
+
+ if ((lookup_attribute
+ ("interrupt_handler",
+ DECL_MACHINE_ATTRIBUTES (current_function_decl)))
+ != NULL_TREE)
+ interrupt_handler = 1;
+ else
+ interrupt_handler = 0;
+
+ if (trap_exit)
+ fprintf (stream, "trapa #%d", trap_exit);
+ else if (interrupt_handler)
fprintf (stream, "rte");
else
fprintf (stream, "rts");
break;
+ }
case '#':
/* Output a nop if there's nothing in the delay slot. */
if (dbr_sequence_length () == 0)
fputs (reg_names[REGNO (x) + 1], (stream));
break;
case MEM:
- print_operand_address (stream,
- XEXP (adj_offsettable_operand (x, 4), 0));
+ if (GET_CODE (XEXP (x, 0)) != PRE_DEC
+ && GET_CODE (XEXP (x, 0)) != POST_INC)
+ x = adj_offsettable_operand (x, 4);
+ print_operand_address (stream, XEXP (x, 0));
break;
}
break;
+ case 'o':
+ switch (GET_CODE (x))
+ {
+ case PLUS: fputs ("add", stream); break;
+ case MINUS: fputs ("sub", stream); break;
+ case MULT: fputs ("mul", stream); break;
+ case DIV: fputs ("div", stream); break;
+ }
+ break;
default:
switch (GET_CODE (x))
{
case REG:
- fputs (reg_names[REGNO (x)], (stream));
+ if (REGNO (x) >= FIRST_FP_REG && REGNO (x) <= LAST_FP_REG
+ && GET_MODE_SIZE (GET_MODE (x)) > 4)
+ fprintf ((stream), "d%s", reg_names[REGNO (x)]+1);
+ else
+ fputs (reg_names[REGNO (x)], (stream));
break;
case MEM:
output_address (XEXP (x, 0));
}
}
\f
+static void force_into PROTO ((rtx, rtx));
+
+/* Like force_operand, but guarantees that VALUE ends up in TARGET. */
+static void
+force_into (value, target)
+ rtx value, target;
+{
+ value = force_operand (value, target);
+ if (! rtx_equal_p (value, target))
+ emit_insn (gen_move_insn (target, value));
+}
+
/* Emit code to perform a block move. Choose the best method.
OPERANDS[0] is the destination.
if (! constp || align < 4 || (bytes % 4 != 0))
return 0;
+ if (TARGET_HARD_SH4)
+ {
+ if (bytes < 12)
+ return 0;
+ else if (bytes == 12)
+ {
+ tree entry_name;
+ rtx func_addr_rtx;
+ rtx r4 = gen_rtx (REG, SImode, 4);
+ rtx r5 = gen_rtx (REG, SImode, 5);
+
+ entry_name = get_identifier ("__movstrSI12_i4");
+
+ func_addr_rtx
+ = copy_to_mode_reg (Pmode,
+ gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (entry_name)));
+ force_into (XEXP (operands[0], 0), r4);
+ force_into (XEXP (operands[1], 0), r5);
+ emit_insn (gen_block_move_real_i4 (func_addr_rtx));
+ return 1;
+ }
+ else if (! TARGET_SMALLCODE)
+ {
+ tree entry_name;
+ rtx func_addr_rtx;
+ int dwords;
+ rtx r4 = gen_rtx (REG, SImode, 4);
+ rtx r5 = gen_rtx (REG, SImode, 5);
+ rtx r6 = gen_rtx (REG, SImode, 6);
+
+ entry_name = get_identifier (bytes & 4
+ ? "__movstr_i4_odd"
+ : "__movstr_i4_even");
+ func_addr_rtx
+ = copy_to_mode_reg (Pmode,
+ gen_rtx_SYMBOL_REF (Pmode,
+ IDENTIFIER_POINTER (entry_name)));
+ force_into (XEXP (operands[0], 0), r4);
+ force_into (XEXP (operands[1], 0), r5);
+
+ dwords = bytes >> 3;
+ emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
+ emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
+ return 1;
+ }
+ else
+ return 0;
+ }
if (bytes < 64)
{
char entry[30];
= copy_to_mode_reg (Pmode,
gen_rtx (SYMBOL_REF, Pmode,
IDENTIFIER_POINTER (entry_name)));
- emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
- emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+ force_into (XEXP (operands[0], 0), r4);
+ force_into (XEXP (operands[1], 0), r5);
emit_insn (gen_block_move_real (func_addr_rtx));
return 1;
}
= copy_to_mode_reg (Pmode,
gen_rtx (SYMBOL_REF, Pmode,
IDENTIFIER_POINTER (entry_name)));
- emit_insn (gen_move_insn (r4, XEXP (operands[0], 0)));
- emit_insn (gen_move_insn (r5, XEXP (operands[1], 0)));
+ force_into (XEXP (operands[0], 0), r4);
+ force_into (XEXP (operands[1], 0), r5);
/* r6 controls the size of the move. 16 is decremented from it
for each 64 bytes moved. Then the negative bit left over is used
rtx operands[];
enum machine_mode mode;
{
- /* Copy the source to a register if both operands aren't registers. */
- if (! reload_in_progress && ! reload_completed
- && ! register_operand (operands[0], mode)
- && ! register_operand (operands[1], mode))
- operands[1] = copy_to_mode_reg (mode, operands[1]);
+ if (! reload_in_progress && ! reload_completed)
+ {
+ /* Copy the source to a register if both operands aren't registers. */
+ if (! register_operand (operands[0], mode)
+ && ! register_operand (operands[1], mode))
+ operands[1] = copy_to_mode_reg (mode, operands[1]);
+
+ /* This case can happen while generating code to move the result
+ of a library call to the target. Reject `st r0,@(rX,rY)' because
+ reload will fail to find a spill register for rX, since r0 is already
+ being used for the source. */
+ else if (GET_CODE (operands[1]) == REG && REGNO (operands[1]) == 0
+ && GET_CODE (operands[0]) == MEM
+ && GET_CODE (XEXP (operands[0], 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
+ operands[1] = copy_to_mode_reg (mode, operands[1]);
+ }
return 0;
}
mode = GET_MODE (sh_compare_op1);
sh_compare_op0 = force_reg (mode, sh_compare_op0);
- if (code != EQ && code != NE
- && (sh_compare_op1 != const0_rtx
- || code == GTU || code == GEU || code == LTU || code == LEU))
+ if ((code != EQ && code != NE
+ && (sh_compare_op1 != const0_rtx
+ || code == GTU || code == GEU || code == LTU || code == LEU))
+ || TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT)
sh_compare_op1 = force_reg (mode, sh_compare_op1);
- /* ??? This should be `mode' not `SImode' in the compare, but that would
- require fixing the branch patterns too. */
- emit_insn (gen_rtx (SET, VOIDmode, t_reg,
- gen_rtx (code, SImode, sh_compare_op0,
- sh_compare_op1)));
+ if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ (mode == SFmode ? emit_sf_insn : emit_df_insn)
+ (gen_rtx (PARALLEL, VOIDmode, gen_rtvec (2,
+ gen_rtx (SET, VOIDmode, t_reg,
+ gen_rtx (code, SImode,
+ sh_compare_op0, sh_compare_op1)),
+ gen_rtx (USE, VOIDmode, get_fpscr_rtx ()))));
+ else
+ emit_insn (gen_rtx (SET, VOIDmode, t_reg,
+ gen_rtx (code, SImode, sh_compare_op0,
+ sh_compare_op1)));
return t_reg;
}
rtx *operands;
int code;
{
- if (code != EQ && code != NE)
+ enum machine_mode mode = GET_MODE (sh_compare_op0);
+ rtx insn;
+ if (mode == VOIDmode)
+ mode = GET_MODE (sh_compare_op1);
+ if (code != EQ
+ || mode == DImode
+ || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
{
- enum machine_mode mode = GET_MODE (sh_compare_op0);
- if (mode == VOIDmode)
- mode = GET_MODE (sh_compare_op1);
-
/* Force args into regs, since we can't use constants here. */
sh_compare_op0 = force_reg (mode, sh_compare_op0);
if (sh_compare_op1 != const0_rtx
- || code == GTU || code == GEU || code == LTU || code == LEU)
+ || code == GTU || code == GEU
+ || (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT))
sh_compare_op1 = force_reg (mode, sh_compare_op1);
}
- operands[1] = sh_compare_op0;
- operands[2] = sh_compare_op1;
+ if (TARGET_SH3E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
+ {
+ from_compare (operands, GT);
+ insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
+ }
+ else
+ insn = gen_rtx (SET, VOIDmode,
+ gen_rtx (REG, SImode, 18),
+ gen_rtx (code, SImode, sh_compare_op0, sh_compare_op1));
+ if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
+ {
+ insn = gen_rtx (PARALLEL, VOIDmode,
+ gen_rtvec (2, insn,
+ gen_rtx (USE, VOIDmode, get_fpscr_rtx ())));
+ (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
+ }
+ else
+ emit_insn (insn);
}
\f
/* Functions to output assembly code. */
INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
}
-/* We can't tell if we need a register as a scratch for the jump
- until after branch shortening, and then it's too late to allocate a
- register the 'proper' way. These instruction sequences are rare
- anyway, so to avoid always using a reg up from our limited set, we'll
- grab one when we need one on output. */
-
-/* ??? Should fix compiler so that using a clobber scratch in jump
- instructions works, and then this will be unnecessary. */
-
char *
output_far_jump (insn, op)
rtx insn;
rtx op;
{
- rtx thislab = gen_label_rtx ();
+ struct { rtx lab, reg, op; } this;
+ char *jump;
+ int far;
+ int offset = branch_dest (insn) - insn_addresses[INSN_UID (insn)];
- /* Output the delay slot insn first if any. */
- if (dbr_sequence_length ())
- print_slot (final_sequence);
+ this.lab = gen_label_rtx ();
- output_asm_insn ("mov.l r13,@-r15", 0);
- output_asm_insn ("mov.l %O0,r13", &thislab);
- output_asm_insn ("jmp @r13", 0);
- output_asm_insn ("mov.l @r15+,r13", 0);
- output_asm_insn (".align 2", 0);
- ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (thislab));
- output_asm_insn (".long %O0", &op);
+ if (TARGET_SH2
+ && offset >= -32764
+ && offset - get_attr_length (insn) <= 32766)
+ {
+ far = 0;
+ jump = "mov.w %O0,%1;braf %1";
+ }
+ else
+ {
+ far = 1;
+ jump = "mov.l %O0,%1;jmp @%1";
+ }
+ /* If we have a scratch register available, use it. */
+ if (GET_CODE (PREV_INSN (insn)) == INSN
+ && INSN_CODE (PREV_INSN (insn)) == CODE_FOR_indirect_jump_scratch)
+ {
+ this.reg = SET_DEST (PATTERN (PREV_INSN (insn)));
+ output_asm_insn (jump, &this.lab);
+ if (dbr_sequence_length ())
+ print_slot (final_sequence);
+ else
+ output_asm_insn ("nop", 0);
+ }
+ else
+ {
+ /* Output the delay slot insn first if any. */
+ if (dbr_sequence_length ())
+ print_slot (final_sequence);
+
+ this.reg = gen_rtx (REG, SImode, 13);
+ output_asm_insn ("mov.l r13,@-r15", 0);
+ output_asm_insn (jump, &this.lab);
+ output_asm_insn ("mov.l @r15+,r13", 0);
+ }
+ if (far)
+ output_asm_insn (".align 2", 0);
+ ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
+ this.op = op;
+ output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
return "";
}
rtx insn;
rtx *operands;
{
- int label = lf++;
-
switch (get_attr_length (insn))
{
- case 2:
- /* A branch with an unfilled delay slot. */
- case 4:
- /* Simple branch in range -252..+258 bytes */
- return logic ? "bt%. %l0" : "bf%. %l0";
-
case 6:
- /* A branch with an unfilled delay slot. */
- case 8:
- /* Branch in range -4092..+4098 bytes. */
- {
- /* The call to print_slot will clobber the operands. */
- rtx op0 = operands[0];
-
- /* If the instruction in the delay slot is annulled (true), then
- there is no delay slot where we can put it now. The only safe
- place for it is after the label. */
-
- if (final_sequence)
- {
- fprintf (asm_out_file, "\tb%c%s\tLF%d\n", logic ? 'f' : 't',
- INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
- ? "" : ".s", label);
- if (! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- print_slot (final_sequence);
- }
- else
- fprintf (asm_out_file, "\tb%c\tLF%d\n", logic ? 'f' : 't', label);
-
- output_asm_insn ("bra %l0", &op0);
- fprintf (asm_out_file, "\tnop\n");
- fprintf (asm_out_file, "LF%d:\n", label);
-
- if (final_sequence
- && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- print_slot (final_sequence);
- }
- return "";
-
- case 16:
- /* A branch with an unfilled delay slot. */
- case 18:
- /* Branches a long way away. */
- {
- /* The call to print_slot will clobber the operands. */
- rtx op0 = operands[0];
-
- /* If the instruction in the delay slot is annulled (true), then
- there is no delay slot where we can put it now. The only safe
- place for it is after the label. */
-
- if (final_sequence)
- {
- fprintf (asm_out_file, "\tb%c%s\tLF%d\n", logic ? 'f' : 't',
- INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
- ? "" : ".s", label);
- if (! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
+ /* This can happen if filling the delay slot has caused a forward
+ branch to exceed its range (we could reverse it, but only
+ when we know we won't overextend other branches; this should
+ best be handled by relaxation).
+ It can also happen when other condbranches hoist delay slot insn
+ from their destination, thus leading to code size increase.
+ But the branch will still be in the range -4092..+4098 bytes. */
+
+ if (! TARGET_RELAX)
+ {
+ int label = lf++;
+ /* The call to print_slot will clobber the operands. */
+ rtx op0 = operands[0];
+
+ /* If the instruction in the delay slot is annulled (true), then
+ there is no delay slot where we can put it now. The only safe
+ place for it is after the label. final will do that by default. */
+
+ if (final_sequence
+ && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
+ {
+ asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
+ ASSEMBLER_DIALECT ? "/" : ".", label);
print_slot (final_sequence);
- }
- else
- fprintf (asm_out_file, "\tb%c\tLF%d\n", logic ? 'f' : 't', label);
-
- output_far_jump (insn, op0);
- fprintf (asm_out_file, "LF%d:\n", label);
-
- if (final_sequence
- && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
- print_slot (final_sequence);
- }
- return "";
+ }
+ else
+ asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
+
+ output_asm_insn ("bra\t%l0", &op0);
+ fprintf (asm_out_file, "\tnop\n");
+ ASM_OUTPUT_INTERNAL_LABEL(asm_out_file, "LF", label);
+
+ return "";
+ }
+ /* When relaxing, handle this like a short branch. The linker
+ will fix it up if it still doesn't fit after relaxation. */
+ case 2:
+ return logic ? "bt%.\t%l0" : "bf%.\t%l0";
+ default:
+ abort ();
}
- return "bad";
}
-\f
-/* A copy of the option structure defined in toplev.c. */
-struct option
+char *
+output_branchy_insn (code, template, insn, operands)
+ char *template;
+ enum rtx_code code;
+ rtx insn;
+ rtx *operands;
{
- char *string;
- int *variable;
- int on_value;
-};
+ rtx next_insn = NEXT_INSN (insn);
+ int label_nr;
-/* Output a single output option string NAME to FILE, without generating
- lines longer than MAX. */
-
-static int
-output_option (file, sep, type, name, indent, pos, max)
- FILE *file;
- char *sep;
- char *type;
- char *name;
- char *indent;
- int pos;
- int max;
-{
- if (strlen (sep) + strlen (type) + strlen (name) + pos > max)
+ if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
{
- fprintf (file, indent);
- return fprintf (file, "%s%s", type, name);
+ rtx src = SET_SRC (PATTERN (next_insn));
+ if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
+ {
+ /* Following branch not taken */
+ operands[9] = gen_label_rtx ();
+ emit_label_after (operands[9], next_insn);
+ return template;
+ }
+ else
+ {
+ int offset = (branch_dest (next_insn)
+ - insn_addresses[INSN_UID (next_insn)] + 4);
+ if (offset >= -252 && offset <= 258)
+ {
+ if (GET_CODE (src) == IF_THEN_ELSE)
+ /* branch_true */
+ src = XEXP (src, 1);
+ operands[9] = src;
+ return template;
+ }
+ }
}
- return pos + fprintf (file, "%s%s%s", sep, type, name);
+ operands[9] = gen_label_rtx ();
+ emit_label_after (operands[9], insn);
+ return template;
}
-/* A copy of the target_switches variable in toplev.c. */
-
-static struct
-{
- char *name;
- int value;
-} m_options[] = TARGET_SWITCHES;
-
-/* Output all options to the assembly language file. */
-
-static void
-output_options (file, f_options, f_len, W_options, W_len,
- pos, max, sep, indent, term)
- FILE *file;
- struct option *f_options;
- struct option *W_options;
- int f_len, W_len;
- int pos;
- int max;
- char *sep;
- char *indent;
- char *term;
+char *
+output_ieee_ccmpeq (insn, operands)
+ rtx insn, operands;
{
- register int j;
-
- if (optimize)
- pos = output_option (file, sep, "-O", "", indent, pos, max);
- if (write_symbols != NO_DEBUG)
- pos = output_option (file, sep, "-g", "", indent, pos, max);
- if (profile_flag)
- pos = output_option (file, sep, "-p", "", indent, pos, max);
- if (profile_block_flag)
- pos = output_option (file, sep, "-a", "", indent, pos, max);
-
- for (j = 0; j < f_len; j++)
- if (*f_options[j].variable == f_options[j].on_value)
- pos = output_option (file, sep, "-f", f_options[j].string,
- indent, pos, max);
-
- for (j = 0; j < W_len; j++)
- if (*W_options[j].variable == W_options[j].on_value)
- pos = output_option (file, sep, "-W", W_options[j].string,
- indent, pos, max);
-
- for (j = 0; j < sizeof m_options / sizeof m_options[0]; j++)
- if (m_options[j].name[0] != '\0'
- && m_options[j].value > 0
- && ((m_options[j].value & target_flags)
- == m_options[j].value))
- pos = output_option (file, sep, "-m", m_options[j].name,
- indent, pos, max);
-
- fprintf (file, term);
+ output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
}
-
+\f
/* Output to FILE the start of the assembler file. */
void
-output_file_start (file, f_options, f_len, W_options, W_len)
+output_file_start (file)
FILE *file;
- struct option *f_options;
- struct option *W_options;
- int f_len, W_len;
{
register int pos;
gcc2_compiled. symbol aren't in the text section. */
data_section ();
- pos = fprintf (file, "\n! Hitachi SH cc1 (%s) arguments:", version_string);
- output_options (file, f_options, f_len, W_options, W_len,
- pos, 75, " ", "\n! ", "\n\n");
-
if (TARGET_LITTLE_ENDIAN)
fprintf (file, "\t.little\n");
}
{16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
{16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+/* Likewise, but for shift amounts < 16, up to three highmost bits
+ might be clobbered. This is typically used when combined with some
+ kind of sign or zero extension. */
+
+static char ext_shift_insns[] =
+ { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
+
+static short ext_shift_amounts[32][4] = {
+ {0}, {1}, {2}, {2, 1},
+ {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
+ {8}, {8, 1}, {8, 2}, {8, 1, 2},
+ {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
+ {16}, {16, 1}, {16, 2}, {16, 1, 2},
+ {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
+ {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
+ {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
+
+/* Assuming we have a value that has been sign-extended by at least one bit,
+ can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
+ to shift it by N without data loss, and quicker than by other means? */
+#define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
+
/* This is used in length attributes in sh.md to help compute the length
of arbitrary constant shift instructions. */
/* If shift by a non constant, then this will be expensive. */
if (GET_CODE (XEXP (x, 1)) != CONST_INT)
- {
- if (TARGET_SH3)
- return 2;
- /* If not an sh3 then we don't even have an instruction for it. */
- return 20;
- }
+ return SH_DYNAMIC_SHIFT_COST;
/* Otherwise, return the true cost in instructions. */
if (GET_CODE (x) == ASHIFTRT)
{
int cost = ashiftrt_insns[value];
/* If SH3, then we put the constant in a reg and use shad. */
- if (TARGET_SH3 && cost > 3)
- cost = 3;
+ if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
+ cost = 1 + SH_DYNAMIC_SHIFT_COST;
return cost;
}
else
}
}
+/* Same for HImode */
+
+void
+gen_ashift_hi (type, n, reg)
+ int type;
+ int n;
+ rtx reg;
+{
+ /* Negative values here come from the shift_amounts array. */
+ if (n < 0)
+ {
+ if (type == ASHIFT)
+ type = LSHIFTRT;
+ else
+ type = ASHIFT;
+ n = -n;
+ }
+
+ switch (type)
+ {
+ case ASHIFTRT:
+ case LSHIFTRT:
+ /* We don't have HImode right shift operations because using the
+ ordinary 32 bit shift instructions for that doesn't generate proper
+ zero/sign extension.
+ gen_ashift_hi is only called in contexts where we know that the
+ sign extension works out correctly. */
+ {
+ int word = 0;
+ if (GET_CODE (reg) == SUBREG)
+ {
+ word = SUBREG_WORD (reg);
+ reg = SUBREG_REG (reg);
+ }
+ gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, word));
+ break;
+ }
+ case ASHIFT:
+ emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
+ break;
+ }
+}
+
/* Output RTL to split a constant shift into its component SH constant
shift instructions. */
for (i = 0; i < max; i++)
gen_ashift (code, shift_amounts[value][i], operands[0]);
}
+
+/* Same as above, but optimized for values where the topmost bits don't
+ matter. */
+
+int
+gen_shifty_hi_op (code, operands)
+ int code;
+ rtx *operands;
+{
+ int value = INTVAL (operands[2]);
+ int max, i;
+ void (*gen_fun)();
+
+ /* This operation is used by and_shl for SImode values with a few
+ high bits known to be cleared. */
+ value &= 31;
+ if (value == 0)
+ {
+ emit_insn (gen_nop ());
+ return;
+ }
+
+ gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
+ if (code == ASHIFT)
+ {
+ max = ext_shift_insns[value];
+ for (i = 0; i < max; i++)
+ gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+ }
+ else
+ /* When shifting right, emit the shifts in reverse order, so that
+ solitary negative values come first. */
+ for (i = ext_shift_insns[value] - 1; i >= 0; i--)
+ gen_fun (code, ext_shift_amounts[value][i], operands[0]);
+}
/* Output RTL for an arithmetic right shift. */
emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
return 1;
}
- else if (ashiftrt_insns[INTVAL (operands[2])] > 3)
+ else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
+ > 1 + SH_DYNAMIC_SHIFT_COST)
{
- rtx count = force_reg (SImode, GEN_INT (- INTVAL (operands[2])));
+ rtx count
+ = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
return 1;
}
if (GET_CODE (operands[2]) != CONST_INT)
return 0;
- value = INTVAL (operands[2]);
+ value = INTVAL (operands[2]) & 31;
if (value == 31)
{
emit_move_insn (operands[0], gen_rtx (REG, SImode, 4));
return 1;
}
-\f
-/* The SH cannot load a large constant into a register, constants have to
- come from a pc relative load. The reference of a pc relative load
- instruction must be less than 1k infront of the instruction. This
- means that we often have to dump a constant inside a function, and
- generate code to branch around it.
- It is important to minimize this, since the branches will slow things
- down and make things bigger.
+int sh_dynamicalize_shift_p (count)
+ rtx count;
+{
+ return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
+}
- Worst case code looks like:
+/* Try to find a good way to implement the combiner pattern
+ [(set (match_operand:SI 0 "register_operand" "r")
+ (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "n"))
+ (match_operand:SI 3 "const_int_operand" "n"))) .
+ LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
+ return 0 for simple right / left or left/right shift combination.
+ return 1 for a combination of shifts with zero_extend.
+ return 2 for a combination of shifts with an AND that needs r0.
+ return 3 for a combination of shifts with an AND that needs an extra
+ scratch register, when the three highmost bits of the AND mask are clear.
+ return 4 for a combination of shifts with an AND that needs an extra
+ scratch register, when any of the three highmost bits of the AND mask
+ is set.
+ If ATTRP is set, store an initial right shift width in ATTRP[0],
+ and the instruction length in ATTRP[1] . These values are not valid
+ when returning 0.
+ When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
+ shift_amounts for the last shift value that is to be used before the
+ sign extend. */
+int
+shl_and_kind (left_rtx, mask_rtx, attrp)
+ rtx left_rtx, mask_rtx;
+ int *attrp;
+{
+ unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
+ int left = INTVAL (left_rtx), right;
+ int best = 0;
+ int cost, best_cost = 10000;
+ int best_right = 0, best_len = 0;
+ int i;
+ int can_ext;
- mov.l L1,rn
- bra L2
- nop
- align
- L1: .long value
- L2:
- ..
+ if (left < 0 || left > 31)
+ return 0;
+ if (GET_CODE (mask_rtx) == CONST_INT)
+ mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
+ else
+ mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
+ /* Can this be expressed as a right shift / left shift pair ? */
+ lsb = ((mask ^ (mask - 1)) >> 1) + 1;
+ right = exact_log2 (lsb);
+ mask2 = ~(mask + lsb - 1);
+ lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
+ /* mask has no zeroes but trailing zeroes <==> ! mask2 */
+ if (! mask2)
+ best_cost = shift_insns[right] + shift_insns[right + left];
+ /* mask has no trailing zeroes <==> ! right */
+ else if (! right && mask2 == ~(lsb2 - 1))
+ {
+ int late_right = exact_log2 (lsb2);
+ best_cost = shift_insns[left + late_right] + shift_insns[late_right];
+ }
+ /* Try to use zero extend */
+ if (mask2 == ~(lsb2 - 1))
+ {
+ int width, first;
- mov.l L3,rn
- bra L4
- nop
- align
- L3: .long value
- L4:
- ..
+ for (width = 8; width <= 16; width += 8)
+ {
+ /* Can we zero-extend right away? */
+ if (lsb2 == (HOST_WIDE_INT)1 << width)
+ {
+ cost
+ = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
+ if (cost < best_cost)
+ {
+ best = 1;
+ best_cost = cost;
+ best_right = right;
+ best_len = cost;
+ if (attrp)
+ attrp[2] = -1;
+ }
+ continue;
+ }
+ /* ??? Could try to put zero extend into initial right shift,
+ or even shift a bit left before the right shift. */
+ /* Determine value of first part of left shift, to get to the
+ zero extend cut-off point. */
+ first = width - exact_log2 (lsb2) + right;
+ if (first >= 0 && right + left - first >= 0)
+ {
+ cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
+ + ext_shift_insns[right + left - first];
+ if (cost < best_cost)
+ {
+ best = 1;
+ best_cost = cost;
+ best_right = right;
+ best_len = cost;
+ if (attrp)
+ attrp[2] = first;
+ }
+ }
+ }
+ }
+ /* Try to use r0 AND pattern */
+ for (i = 0; i <= 2; i++)
+ {
+ if (i > right)
+ break;
+ if (! CONST_OK_FOR_L (mask >> i))
+ continue;
+ cost = (i != 0) + 2 + ext_shift_insns[left + i];
+ if (cost < best_cost)
+ {
+ best = 2;
+ best_cost = cost;
+ best_right = i;
+ best_len = cost - 1;
+ }
+ }
+ /* Try to use a scratch register to hold the AND operand. */
+ can_ext = ((mask << left) & 0xe0000000) == 0;
+ for (i = 0; i <= 2; i++)
+ {
+ if (i > right)
+ break;
+ cost = (i != 0) + (CONST_OK_FOR_I (mask >> i) ? 2 : 3)
+ + (can_ext ? ext_shift_insns : shift_insns)[left + i];
+ if (cost < best_cost)
+ {
+ best = 4 - can_ext;
+ best_cost = cost;
+ best_right = i;
+ best_len = cost - 1 - ! CONST_OK_FOR_I (mask >> i);
+ }
+ }
- We fix this by performing a scan before scheduling, which notices which
- instructions need to have their operands fetched from the constant table
- and builds the table.
+ if (attrp)
+ {
+ attrp[0] = best_right;
+ attrp[1] = best_len;
+ }
+ return best;
+}
- The algorithm is:
+/* This is used in length attributes of the unnamed instructions
+ corresponding to shl_and_kind return values of 1 and 2. */
+int
+shl_and_length (insn)
+ rtx insn;
+{
+ rtx set_src, left_rtx, mask_rtx;
+ int attributes[3];
+
+ set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ left_rtx = XEXP (XEXP (set_src, 0), 1);
+ mask_rtx = XEXP (set_src, 1);
+ shl_and_kind (left_rtx, mask_rtx, attributes);
+ return attributes[1];
+}
- scan, find an instruction which needs a pcrel move. Look forward, find the
+/* This is used in length attribute of the and_shl_scratch instruction. */
+
+int
+shl_and_scr_length (insn)
+ rtx insn;
+{
+ rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ int len = shift_insns[INTVAL (XEXP (set_src, 1))];
+ rtx op = XEXP (set_src, 0);
+ len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
+ op = XEXP (XEXP (op, 0), 0);
+ return len + shift_insns[INTVAL (XEXP (op, 1))];
+}
+
+/* Generating rtl? */
+extern int rtx_equal_function_value_matters;
+
+/* Generate rtl for instructions for which shl_and_kind advised a particular
+ method of generating them, i.e. returned zero. */
+
+int
+gen_shl_and (dest, left_rtx, mask_rtx, source)
+ rtx dest, left_rtx, mask_rtx, source;
+{
+ int attributes[3];
+ unsigned HOST_WIDE_INT mask;
+ int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
+ int right, total_shift;
+ int (*shift_gen_fun) PROTO((int, rtx*)) = gen_shifty_hi_op;
+
+ right = attributes[0];
+ total_shift = INTVAL (left_rtx) + right;
+ mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
+ switch (kind)
+ {
+ default:
+ return -1;
+ case 1:
+ {
+ int first = attributes[2];
+ rtx operands[3];
+
+ if (first < 0)
+ {
+ emit_insn ((mask << right) <= 0xff
+ ? gen_zero_extendqisi2(dest,
+ gen_lowpart (QImode, source))
+ : gen_zero_extendhisi2(dest,
+ gen_lowpart (HImode, source)));
+ source = dest;
+ }
+ if (source != dest)
+ emit_insn (gen_movsi (dest, source));
+ operands[0] = dest;
+ if (right)
+ {
+ operands[2] = GEN_INT (right);
+ gen_shifty_hi_op (LSHIFTRT, operands);
+ }
+ if (first > 0)
+ {
+ operands[2] = GEN_INT (first);
+ gen_shifty_hi_op (ASHIFT, operands);
+ total_shift -= first;
+ mask <<= first;
+ }
+ if (first >= 0)
+ emit_insn (mask <= 0xff
+ ? gen_zero_extendqisi2(dest, gen_lowpart (QImode, dest))
+ : gen_zero_extendhisi2(dest, gen_lowpart (HImode, dest)));
+ if (total_shift > 0)
+ {
+ operands[2] = GEN_INT (total_shift);
+ gen_shifty_hi_op (ASHIFT, operands);
+ }
+ break;
+ }
+ case 4:
+ shift_gen_fun = gen_shifty_op;
+ case 3:
+ /* If the topmost bit that matters is set, set the topmost bits
+ that don't matter. This way, we might be able to get a shorter
+ signed constant. */
+ if (mask & ((HOST_WIDE_INT)1 << 31 - total_shift))
+ mask |= (HOST_WIDE_INT)~0 << (31 - total_shift);
+ case 2:
+ /* Don't expand fine-grained when combining, because that will
+ make the pattern fail. */
+ if (rtx_equal_function_value_matters
+ || reload_in_progress || reload_completed)
+ {
+ rtx operands[3];
+
+ /* Cases 3 and 4 should be handled by this split
+ only while combining */
+ if (kind > 2)
+ abort ();
+ if (right)
+ {
+ emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
+ source = dest;
+ }
+ emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
+ if (total_shift)
+ {
+ operands[0] = dest;
+ operands[1] = dest;
+ operands[2] = GEN_INT (total_shift);
+ shift_gen_fun (ASHIFT, operands);
+ }
+ break;
+ }
+ else
+ {
+ int neg = 0;
+ if (kind != 4 && total_shift < 16)
+ {
+ neg = -ext_shift_amounts[total_shift][1];
+ if (neg > 0)
+ neg -= ext_shift_amounts[total_shift][2];
+ else
+ neg = 0;
+ }
+ emit_insn (gen_and_shl_scratch (dest, source,
+ GEN_INT (right),
+ GEN_INT (mask),
+ GEN_INT (total_shift + neg),
+ GEN_INT (neg)));
+ emit_insn (gen_movsi (dest, dest));
+ break;
+ }
+ }
+ return 0;
+}
+
+/* Try to find a good way to implement the combiner pattern
+ [(set (match_operand:SI 0 "register_operand" "=r")
+ (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+ (match_operand:SI 2 "const_int_operand" "n")
+ (match_operand:SI 3 "const_int_operand" "n")
+ (const_int 0)))
+ (clobber (reg:SI 18))]
+ LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
+ return 0 for simple left / right shift combination.
+ return 1 for left shift / 8 bit sign extend / left shift.
+ return 2 for left shift / 16 bit sign extend / left shift.
+ return 3 for left shift / 8 bit sign extend / shift / sign extend.
+ return 4 for left shift / 16 bit sign extend / shift / sign extend.
+ return 5 for left shift / 16 bit sign extend / right shift
+ return 6 for < 8 bit sign extend / left shift.
+ return 7 for < 8 bit sign extend / left shift / single right shift.
+ If COSTP is nonzero, assign the calculated cost to *COSTP. */
+
+int
+shl_sext_kind (left_rtx, size_rtx, costp)
+ rtx left_rtx, size_rtx;
+ int *costp;
+{
+ int left, size, insize, ext;
+ int cost, best_cost;
+ int kind;
+
+ left = INTVAL (left_rtx);
+ size = INTVAL (size_rtx);
+ insize = size - left;
+ if (insize <= 0)
+ abort ();
+ /* Default to left / right shift. */
+ kind = 0;
+ best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
+ if (size <= 16)
+ {
+ /* 16 bit shift / sign extend / 16 bit shift */
+ cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
+ /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
+ below, by alternative 3 or something even better. */
+ if (cost < best_cost)
+ {
+ kind = 5;
+ best_cost = cost;
+ }
+ }
+ /* Try a plain sign extend between two shifts. */
+ for (ext = 16; ext >= insize; ext -= 8)
+ {
+ if (ext <= size)
+ {
+ cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
+ if (cost < best_cost)
+ {
+ kind = ext / 8U;
+ best_cost = cost;
+ }
+ }
+ /* Check if we can do a sloppy shift with a final signed shift
+ restoring the sign. */
+ if (EXT_SHIFT_SIGNED (size - ext))
+ cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
+ /* If not, maybe it's still cheaper to do the second shift sloppy,
+ and do a final sign extend? */
+ else if (size <= 16)
+ cost = ext_shift_insns[ext - insize] + 1
+ + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
+ else
+ continue;
+ if (cost < best_cost)
+ {
+ kind = ext / 8U + 2;
+ best_cost = cost;
+ }
+ }
+ /* Check if we can sign extend in r0 */
+ if (insize < 8)
+ {
+ cost = 3 + shift_insns[left];
+ if (cost < best_cost)
+ {
+ kind = 6;
+ best_cost = cost;
+ }
+ /* Try the same with a final signed shift. */
+ if (left < 31)
+ {
+ cost = 3 + ext_shift_insns[left + 1] + 1;
+ if (cost < best_cost)
+ {
+ kind = 7;
+ best_cost = cost;
+ }
+ }
+ }
+ if (TARGET_SH3)
+ {
+ /* Try to use a dynamic shift. */
+ cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
+ if (cost < best_cost)
+ {
+ kind = 0;
+ best_cost = cost;
+ }
+ }
+ if (costp)
+ *costp = cost;
+ return kind;
+}
+
+/* Function to be used in the length attribute of the instructions
+ implementing this pattern. */
+
+int
+shl_sext_length (insn)
+ rtx insn;
+{
+ rtx set_src, left_rtx, size_rtx;
+ int cost;
+
+ set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+ left_rtx = XEXP (XEXP (set_src, 0), 1);
+ size_rtx = XEXP (set_src, 1);
+ shl_sext_kind (left_rtx, size_rtx, &cost);
+ return cost;
+}
+
+/* Generate rtl for this pattern */
+
+int
+gen_shl_sext (dest, left_rtx, size_rtx, source)
+ rtx dest, left_rtx, size_rtx, source;
+{
+ int kind;
+ int left, size, insize, cost;
+ rtx operands[3];
+
+ kind = shl_sext_kind (left_rtx, size_rtx, &cost);
+ left = INTVAL (left_rtx);
+ size = INTVAL (size_rtx);
+ insize = size - left;
+ switch (kind)
+ {
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ {
+ int ext = kind & 1 ? 8 : 16;
+ int shift2 = size - ext;
+
+ /* Don't expand fine-grained when combining, because that will
+ make the pattern fail. */
+ if (! rtx_equal_function_value_matters
+ && ! reload_in_progress && ! reload_completed)
+ {
+ emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+ emit_insn (gen_movsi (dest, source));
+ break;
+ }
+ if (dest != source)
+ emit_insn (gen_movsi (dest, source));
+ operands[0] = dest;
+ if (ext - insize)
+ {
+ operands[2] = GEN_INT (ext - insize);
+ gen_shifty_hi_op (ASHIFT, operands);
+ }
+ emit_insn (kind & 1
+ ? gen_extendqisi2(dest, gen_lowpart (QImode, dest))
+ : gen_extendhisi2(dest, gen_lowpart (HImode, dest)));
+ if (kind <= 2)
+ {
+ if (shift2)
+ {
+ operands[2] = GEN_INT (shift2);
+ gen_shifty_op (ASHIFT, operands);
+ }
+ }
+ else
+ {
+ if (shift2 > 0)
+ {
+ if (EXT_SHIFT_SIGNED (shift2))
+ {
+ operands[2] = GEN_INT (shift2 + 1);
+ gen_shifty_op (ASHIFT, operands);
+ operands[2] = GEN_INT (1);
+ gen_shifty_op (ASHIFTRT, operands);
+ break;
+ }
+ operands[2] = GEN_INT (shift2);
+ gen_shifty_hi_op (ASHIFT, operands);
+ }
+ else if (shift2)
+ {
+ operands[2] = GEN_INT (-shift2);
+ gen_shifty_hi_op (LSHIFTRT, operands);
+ }
+ emit_insn (size <= 8
+ ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
+ : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+ }
+ break;
+ }
+ case 5:
+ {
+ int i = 16 - size;
+ if (! rtx_equal_function_value_matters
+ && ! reload_in_progress && ! reload_completed)
+ emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+ else
+ {
+ operands[0] = dest;
+ operands[2] = GEN_INT (16 - insize);
+ gen_shifty_hi_op (ASHIFT, operands);
+ emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
+ }
+ /* Don't use gen_ashrsi3 because it generates new pseudos. */
+ while (--i >= 0)
+ gen_ashift (ASHIFTRT, 1, dest);
+ break;
+ }
+ case 6:
+ case 7:
+ /* Don't expand fine-grained when combining, because that will
+ make the pattern fail. */
+ if (! rtx_equal_function_value_matters
+ && ! reload_in_progress && ! reload_completed)
+ {
+ emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
+ emit_insn (gen_movsi (dest, source));
+ break;
+ }
+ emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
+ emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
+ emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
+ operands[0] = dest;
+ operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
+ gen_shifty_op (ASHIFT, operands);
+ if (kind == 7)
+ emit_insn (gen_ashrsi3_k (dest, dest, GEN_INT (1)));
+ break;
+ default:
+ return -1;
+ }
+ return 0;
+}
+\f
+/* The SH cannot load a large constant into a register, constants have to
+ come from a pc relative load. The reference of a pc relative load
+ instruction must be less than 1k infront of the instruction. This
+ means that we often have to dump a constant inside a function, and
+ generate code to branch around it.
+
+ It is important to minimize this, since the branches will slow things
+ down and make things bigger.
+
+ Worst case code looks like:
+
+ mov.l L1,rn
+ bra L2
+ nop
+ align
+ L1: .long value
+ L2:
+ ..
+
+ mov.l L3,rn
+ bra L4
+ nop
+ align
+ L3: .long value
+ L4:
+ ..
+
+ We fix this by performing a scan before scheduling, which notices which
+ instructions need to have their operands fetched from the constant table
+ and builds the table.
+
+ The algorithm is:
+
+ scan, find an instruction which needs a pcrel move. Look forward, find the
last barrier which is within MAX_COUNT bytes of the requirement.
If there isn't one, make one. Process all the instructions between
the find and the barrier.
/* Add a constant to the pool and return its label. */
static rtx
-add_constant (x, mode)
+add_constant (x, mode, last_value)
+ rtx last_value;
rtx x;
enum machine_mode mode;
{
continue;
}
if (rtx_equal_p (x, pool_vector[i].value))
- return pool_vector[i].label;
+ {
+ lab = 0;
+ if (! last_value
+ || ! i
+ || ! rtx_equal_p (last_value, pool_vector[i-1].value))
+ {
+ lab = pool_vector[i].label;
+ if (! lab)
+ pool_vector[i].label = lab = gen_label_rtx ();
+ }
+ return lab;
+ }
}
}
/* Need a new one. */
pool_vector[pool_size].value = x;
- lab = gen_label_rtx ();
+ if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
+ lab = 0;
+ else
+ lab = gen_label_rtx ();
pool_vector[pool_size].mode = mode;
pool_vector[pool_size].label = lab;
pool_size++;
case HImode:
break;
case SImode:
+ case SFmode:
if (need_align)
{
need_align = 0;
scan = emit_label_after (gen_label_rtx (), scan);
scan = emit_insn_after (gen_align_4 (), scan);
}
- scan = emit_label_after (p->label, scan);
+ if (p->label)
+ scan = emit_label_after (p->label, scan);
scan = emit_insn_after (gen_consttable_4 (p->value), scan);
break;
+ case DFmode:
case DImode:
if (need_align)
{
scan = emit_label_after (gen_label_rtx (), scan);
scan = emit_insn_after (gen_align_4 (), scan);
}
- scan = emit_label_after (p->label, scan);
+ if (p->label)
+ scan = emit_label_after (p->label, scan);
scan = emit_insn_after (gen_consttable_8 (p->value), scan);
break;
default:
broken_move (insn)
rtx insn;
{
- if (GET_CODE (insn) == INSN
- && GET_CODE (PATTERN (insn)) == SET
- /* We can load any 8 bit value if we don't care what the high
- order bits end up as. */
- && GET_MODE (SET_DEST (PATTERN (insn))) != QImode
- && CONSTANT_P (SET_SRC (PATTERN (insn)))
- && ! (GET_CODE (SET_SRC (PATTERN (insn))) == CONST_DOUBLE
- && (fp_zero_operand (SET_SRC (PATTERN (insn)))
- || fp_one_operand (SET_SRC (PATTERN (insn))))
- && GET_CODE (SET_DEST (PATTERN (insn))) == REG
- && REGNO (SET_DEST (PATTERN (insn))) >= FIRST_FP_REG
- && REGNO (SET_DEST (PATTERN (insn))) <= LAST_FP_REG)
- && (GET_CODE (SET_SRC (PATTERN (insn))) != CONST_INT
- || ! CONST_OK_FOR_I (INTVAL (SET_SRC (PATTERN (insn))))))
- return 1;
+ if (GET_CODE (insn) == INSN)
+ {
+ rtx pat = PATTERN (insn);
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+ if (GET_CODE (pat) == SET
+ /* We can load any 8 bit value if we don't care what the high
+ order bits end up as. */
+ && GET_MODE (SET_DEST (pat)) != QImode
+ && CONSTANT_P (SET_SRC (pat))
+ && ! (TARGET_SH3E
+ && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
+ && (fp_zero_operand (SET_SRC (pat))
+ || fp_one_operand (SET_SRC (pat)))
+ && GET_CODE (SET_DEST (pat)) == REG
+ && REGNO (SET_DEST (pat)) >= FIRST_FP_REG
+ && REGNO (SET_DEST (pat)) <= LAST_FP_REG)
+ && (GET_CODE (SET_SRC (pat)) != CONST_INT
+ || ! CONST_OK_FOR_I (INTVAL (SET_SRC (pat)))))
+ return 1;
+ }
return 0;
}
+static int
+mova_p (insn)
+ rtx insn;
+{
+ return (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
+ && XINT (SET_SRC (PATTERN (insn)), 1) == 1);
+}
+
/* Find the last barrier from insn FROM which is close enough to hold the
constant pool. If we can't find one, then create one near the end of
the range. */
-/* ??? It would be good to put constant pool tables between a case jump and
- the jump table. This fails for two reasons. First, there is no
- barrier after the case jump. This is a bug in the casesi pattern.
- Second, inserting the table here may break the mova instruction that
- loads the jump table address, by moving the jump table too far away.
- We fix that problem by never outputting the constant pool between a mova
- and its label. */
-
static rtx
-find_barrier (from)
- rtx from;
+find_barrier (num_mova, mova, from)
+ int num_mova;
+ rtx mova, from;
{
int count_si = 0;
int count_hi = 0;
int found_hi = 0;
int found_si = 0;
- rtx found_barrier = 0;
- rtx found_mova = 0;
+ int hi_align = 2;
+ int si_align = 2;
+ int leading_mova = num_mova;
+ rtx barrier_before_mova, found_barrier = 0, good_barrier = 0;
int si_limit;
int hi_limit;
before the table, subtract 2 for the instruction that fills the jump
delay slot. This gives 1018. */
- /* If not optimizing, then it is possible that the jump instruction we add
- won't be shortened, and thus will have a length of 14 instead of 2.
- We must adjust the limits downwards to account for this, giving a limit
- of 1008 for SImode and 500 for HImode. */
+ /* The branch will always be shortened now that the reference address for
+ forward branches is the successor address, thus we need no longer make
+ adjustments to the [sh]i_limit for -O0. */
- if (optimize)
- {
- si_limit = 1018;
- hi_limit = 510;
- }
- else
- {
- si_limit = 1008;
- hi_limit = 500;
- }
+ si_limit = 1018;
+ hi_limit = 510;
while (from && count_si < si_limit && count_hi < hi_limit)
{
int inc = get_attr_length (from);
+ int new_align = 1;
+
+ if (GET_CODE (from) == CODE_LABEL)
+ {
+ if (optimize)
+ new_align = 1 << label_to_alignment (from);
+ else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
+ new_align = 1 << barrier_align (from);
+ else
+ new_align = 1;
+ inc = 0;
+ }
if (GET_CODE (from) == BARRIER)
- found_barrier = from;
+ {
+
+ found_barrier = from;
+
+ /* If we are at the end of the function, or in front of an alignment
+ instruction, we need not insert an extra alignment. We prefer
+ this kind of barrier. */
+ if (barrier_align (from) > 2)
+ good_barrier = from;
+ }
if (broken_move (from))
{
- rtx pat = PATTERN (from);
- rtx src = SET_SRC (pat);
- rtx dst = SET_DEST (pat);
- enum machine_mode mode = GET_MODE (dst);
+ rtx pat, src, dst;
+ enum machine_mode mode;
+
+ pat = PATTERN (from);
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+ src = SET_SRC (pat);
+ dst = SET_DEST (pat);
+ mode = GET_MODE (dst);
/* We must explicitly check the mode, because sometimes the
front end will generate code to load unsigned constants into
HImode targets without properly sign extending them. */
- if (mode == HImode || (mode == SImode && hi_const (src)))
+ if (mode == HImode
+ || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
{
- found_hi = 1;
+ found_hi += 2;
/* We put the short constants before the long constants, so
we must count the length of short constants in the range
for the long constants. */
/* ??? This isn't optimal, but is easy to do. */
- if (found_si)
- count_si += 2;
+ si_limit -= 2;
}
else
- found_si = 1;
+ {
+ while (si_align > 2 && found_si + si_align - 2 > count_si)
+ si_align >>= 1;
+ if (found_si > count_si)
+ count_si = found_si;
+ found_si += GET_MODE_SIZE (mode);
+ if (num_mova)
+ si_limit -= GET_MODE_SIZE (mode);
+ }
}
- if (GET_CODE (from) == INSN
- && GET_CODE (PATTERN (from)) == SET
- && GET_CODE (SET_SRC (PATTERN (from))) == UNSPEC
- && XINT (SET_SRC (PATTERN (from)), 1) == 1)
- found_mova = from;
+ if (mova_p (from))
+ {
+ if (! num_mova++)
+ {
+ leading_mova = 0;
+ mova = from;
+ barrier_before_mova = good_barrier ? good_barrier : found_barrier;
+ }
+ if (found_si > count_si)
+ count_si = found_si;
+ }
else if (GET_CODE (from) == JUMP_INSN
&& (GET_CODE (PATTERN (from)) == ADDR_VEC
|| GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
- found_mova = 0;
+ {
+ if (num_mova)
+ num_mova--;
+ if (barrier_align (next_real_insn (from)) == CACHE_LOG)
+ {
+ /* We have just passed the barrier in front of the
+ ADDR_DIFF_VEC, which is stored in found_barrier. Since
+ the ADDR_DIFF_VEC is accessed as data, just like our pool
+ constants, this is a good opportunity to accommodate what
+ we have gathered so far.
+ If we waited any longer, we could end up at a barrier in
+ front of code, which gives worse cache usage for separated
+ instruction / data caches. */
+ good_barrier = found_barrier;
+ break;
+ }
+ else
+ {
+ rtx body = PATTERN (from);
+ inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
+ }
+ }
if (found_si)
- count_si += inc;
+ {
+ if (new_align > si_align)
+ {
+ si_limit -= count_si - 1 & new_align - si_align;
+ si_align = new_align;
+ }
+ count_si = count_si + new_align - 1 & -new_align;
+ count_si += inc;
+ }
if (found_hi)
- count_hi += inc;
+ {
+ if (new_align > hi_align)
+ {
+ hi_limit -= count_hi - 1 & new_align - hi_align;
+ hi_align = new_align;
+ }
+ count_hi = count_hi + new_align - 1 & -new_align;
+ count_hi += inc;
+ }
from = NEXT_INSN (from);
}
- /* Insert the constant pool table before the mova instruction, to prevent
- the mova label reference from going out of range. */
- if (found_mova)
- from = found_mova;
+ if (num_mova)
+ if (leading_mova)
+ {
+ /* Try as we might, the leading mova is out of range. Change
+ it into a load (which will become a pcload) and retry. */
+ SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+ INSN_CODE (mova) = -1;
+ return find_barrier (0, 0, mova);
+ }
+ else
+ {
+ /* Insert the constant pool table before the mova instruction,
+ to prevent the mova label reference from going out of range. */
+ from = mova;
+ good_barrier = found_barrier = barrier_before_mova;
+ }
- if (! found_barrier)
+ if (found_barrier)
+ {
+ if (good_barrier && next_real_insn (found_barrier))
+ found_barrier = good_barrier;
+ }
+ else
{
/* We didn't find a barrier in time to dump our stuff,
so we'll make one. */
return found_barrier;
}
+/* If the instruction INSN is implemented by a special function, and we can
+ positively find the register that is used to call the sfunc, and this
+ register is not used anywhere else in this instruction - except as the
+ destination of a set, return this register; else, return 0. */
+rtx
+sfunc_uses_reg (insn)
+ rtx insn;
+{
+ int i;
+ rtx pattern, part, reg_part, reg;
+
+ if (GET_CODE (insn) != INSN)
+ return 0;
+ pattern = PATTERN (insn);
+ if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
+ return 0;
+
+ for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
+ {
+ part = XVECEXP (pattern, 0, i);
+ if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
+ reg_part = part;
+ }
+ if (! reg_part)
+ return 0;
+ reg = XEXP (reg_part, 0);
+ for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
+ {
+ part = XVECEXP (pattern, 0, i);
+ if (part == reg_part || GET_CODE (part) == CLOBBER)
+ continue;
+ if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
+ && GET_CODE (SET_DEST (part)) == REG)
+ ? SET_SRC (part) : part)))
+ return 0;
+ }
+ return reg;
+}
+
/* See if the only way in which INSN uses REG is by calling it, or by
setting it while calling it. Set *SET to a SET rtx if the register
is set by INSN. */
rtx insn;
rtx *set;
{
- rtx pattern;
+ rtx pattern, reg2;
*set = NULL_RTX;
+ reg2 = sfunc_uses_reg (insn);
+ if (reg2 && REGNO (reg2) == REGNO (reg))
+ {
+ pattern = single_set (insn);
+ if (pattern
+ && GET_CODE (SET_DEST (pattern)) == REG
+ && REGNO (reg) == REGNO (SET_DEST (pattern)))
+ *set = pattern;
+ return 0;
+ }
if (GET_CODE (insn) != CALL_INSN)
{
/* We don't use rtx_equal_p because we don't care if the mode is
&& GET_CODE (SET_DEST (pattern)) == REG
&& REGNO (reg) == REGNO (SET_DEST (pattern)))
{
+ rtx par, part;
+ int i;
+
*set = pattern;
- return 0;
+ par = PATTERN (insn);
+ if (GET_CODE (par) == PARALLEL)
+ for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
+ {
+ part = XVECEXP (par, 0, i);
+ if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
+ return 1;
+ }
+ return reg_mentioned_p (reg, SET_SRC (pattern));
}
return 1;
return 0;
}
-/* Exported to toplev.c.
-
- Do a final pass over the function, just before delayed branch
- scheduling. */
-
-void
-machine_dependent_reorg (first)
- rtx first;
+/* Given a X, a pattern of an insn or a part of it, return a mask of used
+ general registers. Bits 0..15 mean that the respective registers
+ are used as inputs in the instruction. Bits 16..31 mean that the
+ registers 0..15, respectively, are used as outputs, or are clobbered.
+ IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
+int
+regs_used (x, is_dest)
+ rtx x; int is_dest;
{
- rtx insn;
-
- /* If relaxing, generate pseudo-ops to associate function calls with
- the symbols they call. It does no harm to not generate these
- pseudo-ops. However, when we can generate them, it enables to
- linker to potentially relax the jsr to a bsr, and eliminate the
- register load and, possibly, the constant pool entry. */
+ enum rtx_code code;
+ char *fmt;
+ int i, used = 0;
- if (TARGET_RELAX)
+ if (! x)
+ return used;
+ code = GET_CODE (x);
+ switch (code)
{
- /* Remove all REG_LABEL notes. We want to use them for our own
+ case REG:
+ if (REGNO (x) < 16)
+ return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+ << (REGNO (x) + is_dest));
+ return 0;
+ case SUBREG:
+ {
+ rtx y = SUBREG_REG (x);
+
+ if (GET_CODE (y) != REG)
+ break;
+ if (REGNO (y) < 16)
+ return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
+ << (REGNO (y) + SUBREG_WORD (x) + is_dest));
+ return 0;
+ }
+ case SET:
+ return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
+ case RETURN:
+ /* If there was a return value, it must have been indicated with USE. */
+ return 0x00ffff00;
+ case CLOBBER:
+ is_dest = 1;
+ break;
+ case MEM:
+ is_dest = 0;
+ break;
+ case CALL:
+ used |= 0x00ff00f0;
+ break;
+ }
+
+ fmt = GET_RTX_FORMAT (code);
+
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'E')
+ {
+ register int j;
+ for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+ used |= regs_used (XVECEXP (x, i, j), is_dest);
+ }
+ else if (fmt[i] == 'e')
+ used |= regs_used (XEXP (x, i), is_dest);
+ }
+ return used;
+}
+
+/* Create an instruction that prevents redirection of a conditional branch
+ to the destination of the JUMP with address ADDR.
+ If the branch needs to be implemented as an indirect jump, try to find
+ a scratch register for it.
+ If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
+ If any preceding insn that doesn't fit into a delay slot is good enough,
+ pass 1. Pass 2 if a definite blocking insn is needed.
+ -1 is used internally to avoid deep recursion.
+ If a blocking instruction is made or recognized, return it. */
+
+static rtx
+gen_block_redirect (jump, addr, need_block)
+ rtx jump;
+ int addr, need_block;
+{
+ int dead = 0;
+ rtx prev = prev_nonnote_insn (jump);
+ rtx dest;
+
+ /* First, check if we already have an instruction that satisfies our need. */
+ if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
+ {
+ if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
+ return prev;
+ if (GET_CODE (PATTERN (prev)) == USE
+ || GET_CODE (PATTERN (prev)) == CLOBBER
+ || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+ prev = jump;
+ else if ((need_block &= ~1) < 0)
+ return prev;
+ else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
+ need_block = 0;
+ }
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ dest = XEXP (SET_SRC (PATTERN (jump)), 0);
+ /* If the branch is out of range, try to find a scratch register for it. */
+ if (optimize
+ && (insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098))
+ {
+ rtx scan;
+ /* Don't look for the stack pointer as a scratch register,
+ it would cause trouble if an interrupt occurred. */
+ unsigned try = 0x7fff, used;
+ int jump_left = flag_expensive_optimizations + 1;
+
+ /* It is likely that the most recent eligible instruction is wanted for
+ the delay slot. Therefore, find out which registers it uses, and
+ try to avoid using them. */
+
+ for (scan = jump; scan = PREV_INSN (scan); )
+ {
+ enum rtx_code code;
+
+ if (INSN_DELETED_P (scan))
+ continue;
+ code = GET_CODE (scan);
+ if (code == CODE_LABEL || code == JUMP_INSN)
+ break;
+ if (code == INSN
+ && GET_CODE (PATTERN (scan)) != USE
+ && GET_CODE (PATTERN (scan)) != CLOBBER
+ && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
+ {
+ try &= ~regs_used (PATTERN (scan), 0);
+ break;
+ }
+ }
+ for (used = dead = 0, scan = JUMP_LABEL (jump); scan = NEXT_INSN (scan); )
+ {
+ enum rtx_code code;
+
+ if (INSN_DELETED_P (scan))
+ continue;
+ code = GET_CODE (scan);
+ if (GET_RTX_CLASS (code) == 'i')
+ {
+ used |= regs_used (PATTERN (scan), 0);
+ if (code == CALL_INSN)
+ used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
+ dead |= (used >> 16) & ~used;
+ if (dead & try)
+ {
+ dead &= try;
+ break;
+ }
+ if (code == JUMP_INSN)
+ if (jump_left-- && simplejump_p (scan))
+ scan = JUMP_LABEL (scan);
+ else
+ break;
+ }
+ }
+ /* Mask out the stack pointer again, in case it was
+ the only 'free' register we have found. */
+ dead &= 0x7fff;
+ }
+ /* If the immediate destination is still in range, check for possible
+ threading with a jump beyond the delay slot insn.
+ Don't check if we are called recursively; the jump has been or will be
+ checked in a different invocation then. */
+
+ else if (optimize && need_block >= 0)
+ {
+ rtx next = next_active_insn (next_active_insn (dest));
+ if (next && GET_CODE (next) == JUMP_INSN
+ && GET_CODE (PATTERN (next)) == SET
+ && recog_memoized (next) == CODE_FOR_jump)
+ {
+ dest = JUMP_LABEL (next);
+ if (dest
+ && insn_addresses[INSN_UID (dest)] - addr + 4092U > 4092 + 4098)
+ gen_block_redirect (next, insn_addresses[INSN_UID (next)], -1);
+ }
+ }
+
+ if (dead)
+ {
+ rtx reg = gen_rtx (REG, SImode, exact_log2 (dead & -dead));
+
+ /* It would be nice if we could convert the jump into an indirect
+ jump / far branch right now, and thus exposing all constituent
+ instructions to further optimization. However, reorg uses
+ simplejump_p to determine if there is an unconditional jump where
+ it should try to schedule instructions from the target of the
+ branch; simplejump_p fails for indirect jumps even if they have
+ a JUMP_LABEL. */
+ rtx insn = emit_insn_before (gen_indirect_jump_scratch
+ (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
+ , jump);
+ INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
+ return insn;
+ }
+ else if (need_block)
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ return emit_insn_before (gen_block_branch_redirect
+ (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
+ , jump);
+ return prev;
+}
+
+#define CONDJUMP_MIN -252
+#define CONDJUMP_MAX 262
+struct far_branch
+{
+ /* A label (to be placed) in front of the jump
+ that jumps to our ultimate destination. */
+ rtx near_label;
+ /* Where we are going to insert it if we cannot move the jump any farther,
+ or the jump itself if we have picked up an existing jump. */
+ rtx insert_place;
+ /* The ultimate destination. */
+ rtx far_label;
+ struct far_branch *prev;
+ /* If the branch has already been created, its address;
+ else the address of its first prospective user. */
+ int address;
+};
+
+enum mdep_reorg_phase_e mdep_reorg_phase;
+void
+gen_far_branch (bp)
+ struct far_branch *bp;
+{
+ rtx insn = bp->insert_place;
+ rtx jump;
+ rtx label = gen_label_rtx ();
+
+ emit_label_after (label, insn);
+ if (bp->far_label)
+ {
+ jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
+ LABEL_NUSES (bp->far_label)++;
+ }
+ else
+ jump = emit_jump_insn_after (gen_return (), insn);
+ /* Emit a barrier so that reorg knows that any following instructions
+ are not reachable via a fall-through path.
+ But don't do this when not optimizing, since we wouldn't supress the
+ alignment for the barrier then, and could end up with out-of-range
+ pc-relative loads. */
+ if (optimize)
+ emit_barrier_after (jump);
+ emit_label_after (bp->near_label, insn);
+ JUMP_LABEL (jump) = bp->far_label;
+ if (! invert_jump (insn, label))
+ abort ();
+ /* Prevent reorg from undoing our splits. */
+ gen_block_redirect (jump, bp->address += 2, 2);
+}
+
+/* Fix up ADDR_DIFF_VECs. */
+void
+fixup_addr_diff_vecs (first)
+ rtx first;
+{
+ rtx insn;
+
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx vec_lab, pat, prev, prevpat, x, braf_label;
+
+ if (GET_CODE (insn) != JUMP_INSN
+ || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
+ continue;
+ pat = PATTERN (insn);
+ vec_lab = XEXP (XEXP (pat, 0), 0);
+
+ /* Search the matching casesi_jump_2. */
+ for (prev = vec_lab; ; prev = PREV_INSN (prev))
+ {
+ if (GET_CODE (prev) != JUMP_INSN)
+ continue;
+ prevpat = PATTERN (prev);
+ if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
+ continue;
+ x = XVECEXP (prevpat, 0, 1);
+ if (GET_CODE (x) != USE)
+ continue;
+ x = XEXP (x, 0);
+ if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
+ break;
+ }
+
+ /* Emit the reference label of the braf where it belongs, right after
+ the casesi_jump_2 (i.e. braf). */
+ braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
+ emit_label_after (braf_label, prev);
+
+ /* Fix up the ADDR_DIF_VEC to be relative
+ to the reference address of the braf. */
+ XEXP (XEXP (pat, 0), 0) = braf_label;
+ }
+}
+
+/* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
+ a barrier. Return the base 2 logarithm of the desired alignment. */
+int
+barrier_align (barrier_or_label)
+ rtx barrier_or_label;
+{
+ rtx next = next_real_insn (barrier_or_label), pat, prev;
+ int slot, credit;
+
+ if (! next)
+ return 0;
+
+ pat = PATTERN (next);
+
+ if (GET_CODE (pat) == ADDR_DIFF_VEC)
+ return 2;
+
+ if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == 1)
+ /* This is a barrier in front of a constant table. */
+ return 0;
+
+ prev = prev_real_insn (barrier_or_label);
+ if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
+ {
+ pat = PATTERN (prev);
+ /* If this is a very small table, we want to keep the alignment after
+ the table to the minimum for proper code alignment. */
+ return ((TARGET_SMALLCODE
+ || (XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
+ <= 1 << (CACHE_LOG - 2)))
+ ? 1 : CACHE_LOG);
+ }
+
+ if (TARGET_SMALLCODE)
+ return 0;
+
+ if (! TARGET_SH2 || ! optimize)
+ return CACHE_LOG;
+
+ /* When fixing up pcloads, a constant table might be inserted just before
+ the basic block that ends with the barrier. Thus, we can't trust the
+ instruction lengths before that. */
+ if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
+ {
+ /* Check if there is an immediately preceding branch to the insn beyond
+ the barrier. We must weight the cost of discarding useful information
+ from the current cache line when executing this branch and there is
+ an alignment, against that of fetching unneeded insn in front of the
+ branch target when there is no alignment. */
+
+ /* PREV is presumed to be the JUMP_INSN for the barrier under
+ investigation. Skip to the insn before it. */
+ prev = prev_real_insn (prev);
+
+ for (slot = 2, credit = 1 << (CACHE_LOG - 2) + 2;
+ credit >= 0 && prev && GET_CODE (prev) == INSN;
+ prev = prev_real_insn (prev))
+ {
+ if (GET_CODE (PATTERN (prev)) == USE
+ || GET_CODE (PATTERN (prev)) == CLOBBER)
+ continue;
+ if (GET_CODE (PATTERN (prev)) == SEQUENCE)
+ prev = XVECEXP (PATTERN (prev), 0, 1);
+ if (slot &&
+ get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
+ slot = 0;
+ credit -= get_attr_length (prev);
+ }
+ if (prev
+ && GET_CODE (prev) == JUMP_INSN
+ && JUMP_LABEL (prev)
+ && next_real_insn (JUMP_LABEL (prev)) == next_real_insn (barrier_or_label)
+ && (credit - slot >= (GET_CODE (SET_SRC (PATTERN (prev))) == PC ? 2 : 0)))
+ return 0;
+ }
+
+ return CACHE_LOG;
+}
+
+/* Exported to toplev.c.
+
+ Do a final pass over the function, just before delayed branch
+ scheduling. */
+
+void
+machine_dependent_reorg (first)
+ rtx first;
+{
+ rtx insn, mova;
+ int num_mova;
+ rtx r0_rtx = gen_rtx (REG, Pmode, 0);
+ rtx r0_inc_rtx = gen_rtx (POST_INC, Pmode, r0_rtx);
+
+ /* If relaxing, generate pseudo-ops to associate function calls with
+ the symbols they call. It does no harm to not generate these
+ pseudo-ops. However, when we can generate them, it enables to
+ linker to potentially relax the jsr to a bsr, and eliminate the
+ register load and, possibly, the constant pool entry. */
+
+ mdep_reorg_phase = SH_INSERT_USES_LABELS;
+ if (TARGET_RELAX)
+ {
+ /* Remove all REG_LABEL notes. We want to use them for our own
purposes. This works because none of the remaining passes
need to look at them.
rtx pattern, reg, link, set, scan, dies, label;
int rescan = 0, foundinsn = 0;
- if (GET_CODE (insn) != CALL_INSN)
- continue;
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ pattern = PATTERN (insn);
- pattern = PATTERN (insn);
+ if (GET_CODE (pattern) == PARALLEL)
+ pattern = XVECEXP (pattern, 0, 0);
+ if (GET_CODE (pattern) == SET)
+ pattern = SET_SRC (pattern);
- if (GET_CODE (pattern) == PARALLEL)
- pattern = XVECEXP (pattern, 0, 0);
- if (GET_CODE (pattern) == SET)
- pattern = SET_SRC (pattern);
+ if (GET_CODE (pattern) != CALL
+ || GET_CODE (XEXP (pattern, 0)) != MEM)
+ continue;
- if (GET_CODE (pattern) != CALL
- || GET_CODE (XEXP (pattern, 0)) != MEM)
- continue;
+ reg = XEXP (XEXP (pattern, 0), 0);
+ }
+ else
+ {
+ reg = sfunc_uses_reg (insn);
+ if (! reg)
+ continue;
+ }
- reg = XEXP (XEXP (pattern, 0), 0);
if (GET_CODE (reg) != REG)
continue;
for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
{
+ if (REG_NOTE_KIND (link) != 0)
+ continue;
set = single_set (XEXP (link, 0));
if (set && rtx_equal_p (reg, SET_DEST (set)))
{
if (scan == insn)
foundinsn = 1;
- if (scan != insn && GET_CODE (scan) == CALL_INSN)
+ if (scan != insn
+ && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
{
/* There is a function call to this register other
than the one we are checking. If we optimize
scan = link;
do
{
+ rtx reg2;
+
scan = NEXT_INSN (scan);
if (scan != insn
- && GET_CODE (scan) == CALL_INSN
- && reg_mentioned_p (reg, scan))
+ && ((GET_CODE (scan) == CALL_INSN
+ && reg_mentioned_p (reg, scan))
+ || ((reg2 = sfunc_uses_reg (scan))
+ && REGNO (reg2) == REGNO (reg))))
REG_NOTES (scan) = gen_rtx (EXPR_LIST, REG_LABEL,
label, REG_NOTES (scan));
}
}
}
+ if (TARGET_SH2)
+ fixup_addr_diff_vecs (first);
+
+ if (optimize)
+ {
+ mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
+ shorten_branches (first);
+ }
/* Scan the function looking for move instructions which have to be
changed to pc-relative loads and insert the literal tables. */
- for (insn = first; insn; insn = NEXT_INSN (insn))
+ mdep_reorg_phase = SH_FIXUP_PCLOAD;
+ for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
{
+ if (mova_p (insn))
+ {
+ if (! num_mova++)
+ mova = insn;
+ }
+ else if (GET_CODE (insn) == JUMP_INSN
+ && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
+ && num_mova)
+ {
+ rtx scan;
+ int total;
+
+ num_mova--;
+
+ /* Some code might have been inserted between the mova and
+ its ADDR_DIFF_VEC. Check if the mova is still in range. */
+ for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
+ total += get_attr_length (scan);
+
+ /* range of mova is 1020, add 4 because pc counts from address of
+ second instruction after this one, subtract 2 in case pc is 2
+ byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
+ cancels out with alignment effects of the mova itself. */
+ if (total > 1022)
+ {
+ /* Change the mova into a load, and restart scanning
+ there. broken_move will then return true for mova. */
+ SET_SRC (PATTERN (mova))
+ = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
+ INSN_CODE (mova) = -1;
+ insn = mova;
+ }
+ }
if (broken_move (insn))
{
rtx scan;
/* Scan ahead looking for a barrier to stick the constant table
behind. */
- rtx barrier = find_barrier (insn);
+ rtx barrier = find_barrier (num_mova, mova, insn);
+ rtx last_float_move, last_float = 0, *last_float_addr;
+ if (num_mova && ! mova_p (mova))
+ {
+ /* find_barrier had to change the first mova into a
+ pcload; thus, we have to start with this new pcload. */
+ insn = mova;
+ num_mova = 0;
+ }
/* Now find all the moves between the points and modify them. */
for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
{
+ if (GET_CODE (scan) == CODE_LABEL)
+ last_float = 0;
if (broken_move (scan))
{
- rtx pat = PATTERN (scan);
- rtx src = SET_SRC (pat);
- rtx dst = SET_DEST (pat);
- enum machine_mode mode = GET_MODE (dst);
+ rtx *patp = &PATTERN (scan), pat = *patp;
+ rtx src, dst;
rtx lab;
rtx newinsn;
rtx newsrc;
+ enum machine_mode mode;
+
+ if (GET_CODE (pat) == PARALLEL)
+ patp = &XVECEXP (pat, 0, 0), pat = *patp;
+ src = SET_SRC (pat);
+ dst = SET_DEST (pat);
+ mode = GET_MODE (dst);
- if (mode == SImode && hi_const (src))
+ if (mode == SImode && hi_const (src)
+ && REGNO (dst) != FPUL_REG)
{
int offset = 0;
dst = gen_rtx (REG, HImode, REGNO (dst) + offset);
}
- lab = add_constant (src, mode);
- newsrc = gen_rtx (MEM, mode,
- gen_rtx (LABEL_REF, VOIDmode, lab));
- RTX_UNCHANGING_P (newsrc) = 1;
- newinsn = emit_insn_after (gen_rtx (SET, VOIDmode,
- dst, newsrc), scan);
- REG_NOTES (newinsn) = REG_NOTES (scan);
- REG_NOTES (scan) = NULL_RTX;
- /* If not optimizing, then delete_insn doesn't remove the
- insn from the chain, and hence is not useful. We
- convert the instruction to a NOTE in that case. */
- if (optimize)
- delete_insn (scan);
+ if (GET_CODE (dst) == REG
+ && ((REGNO (dst) >= FIRST_FP_REG
+ && REGNO (dst) <= LAST_XD_REG)
+ || REGNO (dst) == FPUL_REG))
+ {
+ /* This must be an insn that clobbers r0. */
+ rtx clobber = XVECEXP (PATTERN (scan), 0,
+ XVECLEN (PATTERN (scan), 0) - 1);
+
+ if (GET_CODE (clobber) != CLOBBER
+ || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
+ abort ();
+
+ if (last_float
+ && reg_set_between_p (r0_rtx, last_float_move, scan))
+ last_float = 0;
+ lab = add_constant (src, mode, last_float);
+ if (lab)
+ emit_insn_before (gen_mova (lab), scan);
+ else
+ {
+ /* There will be a REG_UNUSED note for r0 on
+ LAST_FLOAT_MOVE; we have to change it to REG_INC,
+ lest reorg:mark_target_live_regs will not
+ consider r0 to be used, and we end up with delay
+ slot insn in front of SCAN that clobber r0. */
+ rtx note
+ = find_regno_note (last_float_move, REG_UNUSED, 0);
+
+ /* If we are not optimizing, then there may not be
+ a note. */
+ if (note)
+ PUT_MODE (note, REG_INC);
+
+ *last_float_addr = r0_inc_rtx;
+ }
+ last_float_move = scan;
+ last_float = src;
+ newsrc = gen_rtx (MEM, mode,
+ ((TARGET_SH4 && ! TARGET_FMOVD
+ || REGNO (dst) == FPUL_REG)
+ ? r0_inc_rtx
+ : r0_rtx));
+ last_float_addr = &XEXP (newsrc, 0);
+
+ /* Remove the clobber of r0. */
+ XEXP (clobber, 0) = gen_rtx_SCRATCH (Pmode);
+ }
else
{
- PUT_CODE (scan, NOTE);
- NOTE_LINE_NUMBER (scan) = NOTE_INSN_DELETED;
- NOTE_SOURCE_FILE (insn) = 0;
+ lab = add_constant (src, mode, 0);
+ newsrc = gen_rtx (MEM, mode,
+ gen_rtx (LABEL_REF, VOIDmode, lab));
}
- scan = newinsn;
+ RTX_UNCHANGING_P (newsrc) = 1;
+ *patp = gen_rtx (SET, VOIDmode, dst, newsrc);
+ INSN_CODE (scan) = -1;
}
}
dump_table (barrier);
+ insn = barrier;
+ }
+ }
+
+ mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
+ insn_addresses = 0;
+ split_branches (first);
+
+ /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
+ also has an effect on the register that holds the addres of the sfunc.
+ Insert an extra dummy insn in front of each sfunc that pretends to
+ use this register. */
+ if (flag_delayed_branch)
+ {
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ {
+ rtx reg = sfunc_uses_reg (insn);
+
+ if (! reg)
+ continue;
+ emit_insn_before (gen_use_sfunc_addr (reg), insn);
}
}
+#if 0
+ /* fpscr is not actually a user variable, but we pretend it is for the
+ sake of the previous optimization passes, since we want it handled like
+ one. However, we don't have eny debugging information for it, so turn
+ it into a non-user variable now. */
+ if (TARGET_SH4)
+ REG_USERVAR_P (get_fpscr_rtx ()) = 0;
+#endif
+ if (optimize)
+ sh_flag_remove_dead_before_cse = 1;
+ mdep_reorg_phase = SH_AFTER_MDEP_REORG;
+}
+
+int
+get_dest_uid (label, max_uid)
+ rtx label;
+ int max_uid;
+{
+ rtx dest = next_real_insn (label);
+ int dest_uid;
+ if (! dest)
+ /* This can happen for an undefined label. */
+ return 0;
+ dest_uid = INSN_UID (dest);
+ /* If this is a newly created branch redirection blocking instruction,
+ we cannot index the branch_uid or insn_addresses arrays with its
+ uid. But then, we won't need to, because the actual destination is
+ the following branch. */
+ while (dest_uid >= max_uid)
+ {
+ dest = NEXT_INSN (dest);
+ dest_uid = INSN_UID (dest);
+ }
+ if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
+ return 0;
+ return dest_uid;
+}
+
+/* Split condbranches that are out of range. Also add clobbers for
+ scratch registers that are needed in far jumps.
+ We do this before delay slot scheduling, so that it can take our
+ newly created instructions into account. It also allows us to
+ find branches with common targets more easily. */
+
+static void
+split_branches (first)
+ rtx first;
+{
+ rtx insn;
+ struct far_branch **uid_branch, *far_branch_list = 0;
+ int max_uid = get_max_uid ();
+
+ /* Find out which branches are out of range. */
+ shorten_branches (first);
+
+ uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
+ bzero ((char *) uid_branch, max_uid * sizeof *uid_branch);
+
+ for (insn = first; insn; insn = NEXT_INSN (insn))
+ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ continue;
+ else if (INSN_DELETED_P (insn))
+ {
+ /* Shorten_branches would split this instruction again,
+ so transform it into a note. */
+ PUT_CODE (insn, NOTE);
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (insn) = 0;
+ }
+ else if (GET_CODE (insn) == JUMP_INSN
+ /* Don't mess with ADDR_DIFF_VEC */
+ && (GET_CODE (PATTERN (insn)) == SET
+ || GET_CODE (PATTERN (insn)) == RETURN))
+ {
+ enum attr_type type = get_attr_type (insn);
+ if (type == TYPE_CBRANCH)
+ {
+ rtx next, beyond;
+
+ if (get_attr_length (insn) > 4)
+ {
+ rtx src = SET_SRC (PATTERN (insn));
+ rtx cond = XEXP (src, 0);
+ rtx olabel = XEXP (XEXP (src, 1), 0);
+ rtx jump;
+ int addr = insn_addresses[INSN_UID (insn)];
+ rtx label = 0;
+ int dest_uid = get_dest_uid (olabel, max_uid);
+ struct far_branch *bp = uid_branch[dest_uid];
+
+ /* redirect_jump needs a valid JUMP_LABEL, and it might delete
+ the label if the LABEL_NUSES count drops to zero. There is
+ always a jump_optimize pass that sets these values, but it
+ proceeds to delete unreferenced code, and then if not
+ optimizing, to un-delete the deleted instructions, thus
+ leaving labels with too low uses counts. */
+ if (! optimize)
+ {
+ JUMP_LABEL (insn) = olabel;
+ LABEL_NUSES (olabel)++;
+ }
+ if (! bp)
+ {
+ bp = (struct far_branch *) alloca (sizeof *bp);
+ uid_branch[dest_uid] = bp;
+ bp->prev = far_branch_list;
+ far_branch_list = bp;
+ bp->far_label
+ = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
+ LABEL_NUSES (bp->far_label)++;
+ }
+ else
+ {
+ label = bp->near_label;
+ if (! label && bp->address - addr >= CONDJUMP_MIN)
+ {
+ rtx block = bp->insert_place;
+
+ if (GET_CODE (PATTERN (block)) == RETURN)
+ block = PREV_INSN (block);
+ else
+ block = gen_block_redirect (block,
+ bp->address, 2);
+ label = emit_label_after (gen_label_rtx (),
+ PREV_INSN (block));
+ bp->near_label = label;
+ }
+ else if (label && ! NEXT_INSN (label))
+ if (addr + 2 - bp->address <= CONDJUMP_MAX)
+ bp->insert_place = insn;
+ else
+ gen_far_branch (bp);
+ }
+ if (! label
+ || NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN)
+ {
+ bp->near_label = label = gen_label_rtx ();
+ bp->insert_place = insn;
+ bp->address = addr;
+ }
+ if (! redirect_jump (insn, label))
+ abort ();
+ }
+ else
+ {
+ /* get_attr_length (insn) == 2 */
+ /* Check if we have a pattern where reorg wants to redirect
+ the branch to a label from an unconditional branch that
+ is too far away. */
+ /* We can't use JUMP_LABEL here because it might be undefined
+ when not optimizing. */
+ /* A syntax error might cause beyond to be NULL_RTX. */
+ beyond
+ = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
+ 0));
+
+ if (beyond
+ && (GET_CODE (beyond) == JUMP_INSN
+ || (GET_CODE (beyond = next_active_insn (beyond))
+ == JUMP_INSN))
+ && GET_CODE (PATTERN (beyond)) == SET
+ && recog_memoized (beyond) == CODE_FOR_jump
+ && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0))]
+ - insn_addresses[INSN_UID (insn)] + 252U)
+ > 252 + 258 + 2))
+ gen_block_redirect (beyond,
+ insn_addresses[INSN_UID (beyond)], 1);
+ }
+
+ next = next_active_insn (insn);
+
+ if ((GET_CODE (next) == JUMP_INSN
+ || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
+ && GET_CODE (PATTERN (next)) == SET
+ && recog_memoized (next) == CODE_FOR_jump
+ && ((insn_addresses[INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0))]
+ - insn_addresses[INSN_UID (insn)] + 252U)
+ > 252 + 258 + 2))
+ gen_block_redirect (next, insn_addresses[INSN_UID (next)], 1);
+ }
+ else if (type == TYPE_JUMP || type == TYPE_RETURN)
+ {
+ int addr = insn_addresses[INSN_UID (insn)];
+ rtx far_label = 0;
+ int dest_uid = 0;
+ struct far_branch *bp;
+
+ if (type == TYPE_JUMP)
+ {
+ far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
+ dest_uid = get_dest_uid (far_label, max_uid);
+ if (! dest_uid)
+ {
+ /* Parse errors can lead to labels outside
+ the insn stream. */
+ if (! NEXT_INSN (far_label))
+ continue;
+
+ if (! optimize)
+ {
+ JUMP_LABEL (insn) = far_label;
+ LABEL_NUSES (far_label)++;
+ }
+ redirect_jump (insn, NULL_RTX);
+ far_label = 0;
+ }
+ }
+ bp = uid_branch[dest_uid];
+ if (! bp)
+ {
+ bp = (struct far_branch *) alloca (sizeof *bp);
+ uid_branch[dest_uid] = bp;
+ bp->prev = far_branch_list;
+ far_branch_list = bp;
+ bp->near_label = 0;
+ bp->far_label = far_label;
+ if (far_label)
+ LABEL_NUSES (far_label)++;
+ }
+ else if (bp->near_label && ! NEXT_INSN (bp->near_label))
+ if (addr - bp->address <= CONDJUMP_MAX)
+ emit_label_after (bp->near_label, PREV_INSN (insn));
+ else
+ {
+ gen_far_branch (bp);
+ bp->near_label = 0;
+ }
+ else
+ bp->near_label = 0;
+ bp->address = addr;
+ bp->insert_place = insn;
+ if (! far_label)
+ emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
+ else
+ gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
+ }
+ }
+ /* Generate all pending far branches,
+ and free our references to the far labels. */
+ while (far_branch_list)
+ {
+ if (far_branch_list->near_label
+ && ! NEXT_INSN (far_branch_list->near_label))
+ gen_far_branch (far_branch_list);
+ if (optimize
+ && far_branch_list->far_label
+ && ! --LABEL_NUSES (far_branch_list->far_label))
+ delete_insn (far_branch_list->far_label);
+ far_branch_list = far_branch_list->prev;
+ }
+
+ /* Instruction length information is no longer valid due to the new
+ instructions that have been generated. */
+ init_insn_lengths ();
}
/* Dump out instruction addresses, which is useful for debugging the
pattern = XVECEXP (pattern, 0, 0);
if (GET_CODE (pattern) == CALL
|| (GET_CODE (pattern) == SET
- && GET_CODE (SET_SRC (pattern)) == CALL))
- fprintf (asm_out_file, "\t.uses L%d\n",
- CODE_LABEL_NUMBER (XEXP (note, 0)));
+ && (GET_CODE (SET_SRC (pattern)) == CALL
+ || get_attr_type (insn) == TYPE_SFUNC)))
+ asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
+ CODE_LABEL_NUMBER (XEXP (note, 0)));
else if (GET_CODE (pattern) == SET)
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "L",
CODE_LABEL_NUMBER (XEXP (note, 0)));
}
/* Dump out any constants accumulated in the final pass. These will
- will only be labels. */
+ only be labels. */
char *
output_jump_label_table ()
static int extra_push;
-/* Adjust the stack and return the number of bytes taken to do it. */
+/* Adjust the stack by SIZE bytes. REG holds the rtl of the register
+ to be adjusted, and TEMP, if nonnegative, holds the register number
+ of a general register that we may clobber. */
static void
-output_stack_adjust (size, reg)
+output_stack_adjust (size, reg, temp)
int size;
rtx reg;
+ int temp;
{
if (size)
{
- rtx val = GEN_INT (size);
- rtx insn;
-
- if (! CONST_OK_FOR_I (size))
+ if (CONST_OK_FOR_I (size))
+ emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
+ /* Try to do it with two partial adjustments; however, we must make
+ sure that the stack is properly aligned at all times, in case
+ an interrupt occurs between the two partial adjustments. */
+ else if (CONST_OK_FOR_I (size / 2 & -4)
+ && CONST_OK_FOR_I (size - (size / 2 & -4)))
{
- rtx reg = gen_rtx (REG, SImode, 3);
- emit_insn (gen_movsi (reg, val));
- val = reg;
+ emit_insn (gen_addsi3 (reg, reg, GEN_INT (size / 2 & -4)));
+ emit_insn (gen_addsi3 (reg, reg, GEN_INT (size - (size / 2 & -4))));
}
+ else
+ {
+ rtx const_reg;
+
+ /* If TEMP is invalid, we could temporarily save a general
+ register to MACL. However, there is currently no need
+ to handle this case, so just abort when we see it. */
+ if (temp < 0)
+ abort ();
+ const_reg = gen_rtx (REG, SImode, temp);
- insn = gen_addsi3 (reg, reg, val);
- emit_insn (insn);
+ /* If SIZE is negative, subtract the positive value.
+ This sometimes allows a constant pool entry to be shared
+ between prologue and epilogue code. */
+ if (size < 0)
+ {
+ emit_insn (gen_movsi (const_reg, GEN_INT (-size)));
+ emit_insn (gen_subsi3 (reg, reg, const_reg));
+ }
+ else
+ {
+ emit_insn (gen_movsi (const_reg, GEN_INT (size)));
+ emit_insn (gen_addsi3 (reg, reg, const_reg));
+ }
+ }
}
}
int rn;
{
rtx x;
- if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
- || rn == FPUL_REG)
- x = emit_insn (gen_push_e (gen_rtx (REG, SFmode, rn)));
+ if (rn == FPUL_REG)
+ x = gen_push_fpul ();
+ else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+ && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
+ {
+ if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
+ return;
+ x = gen_push_4 (gen_rtx (REG, DFmode, rn));
+ }
+ else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
+ x = gen_push_e (gen_rtx (REG, SFmode, rn));
else
- x = emit_insn (gen_push (gen_rtx (REG, SImode, rn)));
+ x = gen_push (gen_rtx (REG, SImode, rn));
+ x = emit_insn (x);
REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
}
int rn;
{
rtx x;
- if ((rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
- || rn == FPUL_REG)
- x = emit_insn (gen_pop_e (gen_rtx (REG, SFmode, rn)));
+ if (rn == FPUL_REG)
+ x = gen_pop_fpul ();
+ else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
+ && rn >= FIRST_FP_REG && rn <= LAST_XD_REG)
+ {
+ if ((rn - FIRST_FP_REG) & 1 && rn <= LAST_FP_REG)
+ return;
+ x = gen_pop_4 (gen_rtx (REG, DFmode, rn));
+ }
+ else if (TARGET_SH3E && rn >= FIRST_FP_REG && rn <= LAST_FP_REG)
+ x = gen_pop_e (gen_rtx (REG, SFmode, rn));
else
- x = emit_insn (gen_pop (gen_rtx (REG, SImode, rn)));
+ x = gen_pop (gen_rtx (REG, SImode, rn));
+ x = emit_insn (x);
REG_NOTES (x) = gen_rtx (EXPR_LIST, REG_INC,
gen_rtx(REG, SImode, STACK_POINTER_REGNUM), 0);
}
-/* Generate code to push the regs specified in the mask, and return
- the number of bytes the insns take. */
+/* Generate code to push the regs specified in the mask. */
static void
push_regs (mask, mask2)
{
int i;
+ /* Push PR last; this gives better latencies after the prologue, and
+ candidates for the return delay slot when there are no general
+ registers pushed. */
for (i = 0; i < 32; i++)
- if (mask & (1 << i))
+ if (mask & (1 << i) && i != PR_REG)
push (i);
for (i = 32; i < FIRST_PSEUDO_REGISTER; i++)
if (mask2 & (1 << (i - 32)))
push (i);
+ if (mask & (1 << PR_REG))
+ push (PR_REG);
}
/* Work out the registers which need to be saved, both as a mask and a
- count.
+ count of saved words.
If doing a pragma interrupt function, then push all regs used by the
function, and if we call another function (we can tell by looking at PR),
{
int reg;
int live_regs_mask = 0;
- int count = 0;
+ int count;
+ int interrupt_handler;
+
+ if ((lookup_attribute
+ ("interrupt_handler",
+ DECL_MACHINE_ATTRIBUTES (current_function_decl)))
+ != NULL_TREE)
+ interrupt_handler = 1;
+ else
+ interrupt_handler = 0;
*live_regs_mask2 = 0;
- for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
- {
- if (pragma_interrupt && ! pragma_trapa)
+ /* If we can save a lot of saves by switching to double mode, do that. */
+ if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
+ for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
+ if (regs_ever_live[reg] && regs_ever_live[reg+1]
+ && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
+ && ++count > 2)
{
- /* Need to save all the regs ever live. */
- if ((regs_ever_live[reg]
- || (call_used_regs[reg] && regs_ever_live[PR_REG]))
- && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
- && reg != T_REG && reg != GBR_REG)
- {
- if (reg >= 32)
- *live_regs_mask2 |= 1 << (reg - 32);
- else
- live_regs_mask |= 1 << reg;
- count++;
- }
+ target_flags &= ~FPU_SINGLE_BIT;
+ break;
}
- else
+ for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
+ {
+ if ((interrupt_handler && ! pragma_trapa)
+ ? (/* Need to save all the regs ever live. */
+ (regs_ever_live[reg]
+ || (call_used_regs[reg]
+ && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
+ && regs_ever_live[PR_REG]))
+ && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
+ && reg != RETURN_ADDRESS_POINTER_REGNUM
+ && reg != T_REG && reg != GBR_REG && reg != FPSCR_REG)
+ : (/* Only push those regs which are used and need to be saved. */
+ regs_ever_live[reg] && ! call_used_regs[reg]))
{
- /* Only push those regs which are used and need to be saved. */
- if (regs_ever_live[reg] && ! call_used_regs[reg])
- {
- if (reg >= 32)
- *live_regs_mask2 |= 1 << (reg - 32);
- else
- live_regs_mask |= (1 << reg);
- count++;
- }
+ if (reg >= 32)
+ *live_regs_mask2 |= 1 << (reg - 32);
+ else
+ live_regs_mask |= 1 << reg;
+ count++;
+ if (TARGET_SH4 && TARGET_FMOVD && reg >= FIRST_FP_REG)
+ if (reg <= LAST_FP_REG)
+ {
+ if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
+ {
+ if (reg >= 32)
+ *live_regs_mask2 |= 1 << ((reg ^ 1) - 32);
+ else
+ live_regs_mask |= 1 << (reg ^ 1);
+ count++;
+ }
+ }
+ else if (reg <= LAST_XD_REG)
+ {
+ /* Must switch to double mode to access these registers. */
+ target_flags &= ~FPU_SINGLE_BIT;
+ count++;
+ }
}
}
int live_regs_mask;
int d, i;
int live_regs_mask2;
- live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+ int save_flags = target_flags;
+ int double_align = 0;
/* We have pretend args if we had an object sent partially in registers
and partially on the stack, e.g. a large structure. */
- output_stack_adjust (-current_function_pretend_args_size, stack_pointer_rtx);
+ output_stack_adjust (-current_function_pretend_args_size,
+ stack_pointer_rtx, 3);
extra_push = 0;
current_function_anonymous_args = 0;
/* This is not used by the SH3E calling convention */
- if (!TARGET_SH3E)
- {
+ if (! TARGET_SH3E && ! TARGET_HITACHI)
+ {
/* Push arg regs as if they'd been provided by caller in stack. */
for (i = 0; i < NPARM_REGS(SImode); i++)
{
int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
- if (i > (NPARM_REGS(SImode)
- - current_function_args_info.arg_count[(int) SH_ARG_INT]
- - current_function_varargs))
+ if (i >= (NPARM_REGS(SImode)
+ - current_function_args_info.arg_count[(int) SH_ARG_INT]
+ ))
break;
push (rn);
extra_push += 4;
}
- }
+ }
}
+ /* If we're supposed to switch stacks at function entry, do so now. */
+ if (sp_switch)
+ emit_insn (gen_sp_switch_1 ());
+
+ live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+ /* ??? Maybe we could save some switching if we can move a mode switch
+ that already happens to be at the function start into the prologue. */
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
push_regs (live_regs_mask, live_regs_mask2);
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
- output_stack_adjust (-get_frame_size (), stack_pointer_rtx);
+ if (TARGET_ALIGN_DOUBLE && d & 1)
+ double_align = 4;
+
+ target_flags = save_flags;
+
+ output_stack_adjust (-get_frame_size () - double_align,
+ stack_pointer_rtx, 3);
if (frame_pointer_needed)
emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
int d, i;
int live_regs_mask2;
+ int save_flags = target_flags;
+ int frame_size = get_frame_size ();
+
live_regs_mask = calc_live_regs (&d, &live_regs_mask2);
+ if (TARGET_ALIGN_DOUBLE && d & 1)
+ frame_size += 4;
+
if (frame_pointer_needed)
{
- /* We deliberately make the add dependent on the frame_pointer,
- to ensure that instruction scheduling won't move the stack pointer
- adjust before instructions reading from the frame. This can fail
- if there is an interrupt which then writes to the stack. */
- output_stack_adjust (get_frame_size (), frame_pointer_rtx);
+ output_stack_adjust (frame_size, frame_pointer_rtx, 7);
+
+ /* We must avoid moving the stack pointer adjustment past code
+ which reads from the local frame, else an interrupt could
+ occur after the SP adjustment and clobber data in the local
+ frame. */
+ emit_insn (gen_blockage ());
emit_insn (gen_movsi (stack_pointer_rtx, frame_pointer_rtx));
}
- else
- output_stack_adjust (get_frame_size (), stack_pointer_rtx);
+ else if (frame_size)
+ {
+ /* We must avoid moving the stack pointer adjustment past code
+ which reads from the local frame, else an interrupt could
+ occur after the SP adjustment and clobber data in the local
+ frame. */
+ emit_insn (gen_blockage ());
+ output_stack_adjust (frame_size, stack_pointer_rtx, 7);
+ }
/* Pop all the registers. */
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
+ if (live_regs_mask & (1 << PR_REG))
+ pop (PR_REG);
for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
{
int j = (FIRST_PSEUDO_REGISTER - 1) - i;
- if (j < 32 && (live_regs_mask & (1 << j)))
+ if (j < 32 && (live_regs_mask & (1 << j)) && j != PR_REG)
pop (j);
else if (j >= 32 && (live_regs_mask2 & (1 << (j - 32))))
pop (j);
}
+ if (target_flags != save_flags)
+ emit_insn (gen_toggle_sz ());
+ target_flags = save_flags;
output_stack_adjust (extra_push + current_function_pretend_args_size,
- stack_pointer_rtx);
+ stack_pointer_rtx, 7);
+
+ /* Switch back to the normal stack if necessary. */
+ if (sp_switch)
+ emit_insn (gen_sp_switch_2 ());
}
/* Clear variables at function end. */
FILE *stream;
int size;
{
- pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
+ trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
+ sp_switch = NULL_RTX;
}
rtx
-sh_builtin_saveregs (arglist)
- tree arglist;
+sh_builtin_saveregs ()
{
tree fntype = TREE_TYPE (current_function_decl);
/* First unnamed integer register. */
bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
regbuf = assign_stack_local (BLKmode, bufsize, 0);
- MEM_IN_STRUCT_P (regbuf) = 1;
+ MEM_SET_IN_STRUCT_P (regbuf, 1);
/* Save int args.
This is optimized to only save the regs that are necessary. Explicitly
named args need not be saved.
We explicitly build a pointer to the buffer because it halves the insn
count when not optimizing (otherwise the pointer is built for each reg
- saved). */
+ saved).
+ We emit the moves in reverse order so that we can use predecrement. */
fpregs = gen_reg_rtx (Pmode);
emit_move_insn (fpregs, XEXP (regbuf, 0));
- for (regno = first_floatreg; regno < NPARM_REGS (SFmode); regno ++)
- emit_move_insn (gen_rtx (MEM, SFmode,
- plus_constant (fpregs,
- GET_MODE_SIZE (SFmode)
- * (regno - first_floatreg))),
- gen_rtx (REG, SFmode,
- BASE_ARG_REG (SFmode) + regno));
+ emit_insn (gen_addsi3 (fpregs, fpregs,
+ GEN_INT (n_floatregs * UNITS_PER_WORD)));
+ if (TARGET_SH4)
+ {
+ for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
+ {
+ emit_insn (gen_addsi3 (fpregs, fpregs,
+ GEN_INT (-2 * UNITS_PER_WORD)));
+ emit_move_insn (gen_rtx (MEM, DFmode, fpregs),
+ gen_rtx (REG, DFmode, BASE_ARG_REG (DFmode) + regno));
+ }
+ regno = first_floatreg;
+ if (regno & 1)
+ {
+ emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
+ emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
+ gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno
+ - (TARGET_LITTLE_ENDIAN != 0)));
+ }
+ }
+ else
+ for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
+ {
+ emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (- UNITS_PER_WORD)));
+ emit_move_insn (gen_rtx (MEM, SFmode, fpregs),
+ gen_rtx (REG, SFmode, BASE_ARG_REG (SFmode) + regno));
+ }
/* Return the address of the regbuf. */
return XEXP (regbuf, 0);
int regs_saved;
int total_saved_regs_space;
int total_auto_space = get_frame_size ();
+ int save_flags = target_flags;
- int live_regs_mask2;
- calc_live_regs (®s_saved, &live_regs_mask2);
+ int live_regs_mask, live_regs_mask2;
+ live_regs_mask = calc_live_regs (®s_saved, &live_regs_mask2);
+ if (TARGET_ALIGN_DOUBLE && regs_saved & 1)
+ total_auto_space += 4;
+ target_flags = save_flags;
total_saved_regs_space = (regs_saved) * 4;
if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
return 0;
+ if (from == RETURN_ADDRESS_POINTER_REGNUM
+ && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
+ {
+ int i, n = total_saved_regs_space;
+ for (i = PR_REG-1; i >= 0; i--)
+ if (live_regs_mask & (1 << i))
+ n -= 4;
+ return n + total_auto_space;
+ }
+
abort ();
}
\f
compiler. */
int
-handle_pragma (file, t)
- FILE *file;
- tree t;
+sh_handle_pragma (p_getc, p_ungetc, pname)
+ int (* p_getc) PROTO((void));
+ void (* p_ungetc) PROTO((int));
+ char * pname;
{
int retval = 0;
- register char *pname;
-
- if (TREE_CODE (t) != IDENTIFIER_NODE)
- return 0;
- pname = IDENTIFIER_POINTER (t);
if (strcmp (pname, "interrupt") == 0)
pragma_interrupt = retval = 1;
else if (strcmp (pname, "trapa") == 0)
return retval;
}
+
+/* Generate 'handle_interrupt' attribute for decls */
+
+void
+sh_pragma_insert_attributes (node, attributes, prefix)
+ tree node;
+ tree * attributes;
+ tree * prefix;
+{
+ tree a;
+
+ if (! pragma_interrupt
+ || TREE_CODE (node) != FUNCTION_DECL)
+ return;
+
+ /* We are only interested in fields. */
+ if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
+ return;
+
+ /* Add a 'handle_interrupt' attribute. */
+ * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
+
+ return;
+}
+
+/* Return nonzero if ATTR is a valid attribute for DECL.
+ ATTRIBUTES are any existing attributes and ARGS are the arguments
+ supplied with ATTR.
+
+ Supported attributes:
+
+ interrupt_handler -- specifies this function is an interrupt handler.
+
+ sp_switch -- specifies an alternate stack for an interrupt handler
+ to run on.
+
+ trap_exit -- use a trapa to exit an interrupt function instead of
+ an rte instruction. */
+
+int
+sh_valid_machine_decl_attribute (decl, attributes, attr, args)
+ tree decl;
+ tree attributes;
+ tree attr;
+ tree args;
+{
+ int retval = 0;
+
+ if (TREE_CODE (decl) != FUNCTION_DECL)
+ return 0;
+
+ if (is_attribute_p ("interrupt_handler", attr))
+ {
+ return 1;
+ }
+
+ if (is_attribute_p ("sp_switch", attr))
+ {
+ /* The sp_switch attribute only has meaning for interrupt functions. */
+ if (!pragma_interrupt)
+ return 0;
+
+ /* sp_switch must have an argument. */
+ if (!args || TREE_CODE (args) != TREE_LIST)
+ return 0;
+
+ /* The argument must be a constant string. */
+ if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
+ return 0;
+
+ sp_switch = gen_rtx (SYMBOL_REF, VOIDmode,
+ TREE_STRING_POINTER (TREE_VALUE (args)));
+ return 1;
+ }
+
+ if (is_attribute_p ("trap_exit", attr))
+ {
+ /* The trap_exit attribute only has meaning for interrupt functions. */
+ if (!pragma_interrupt)
+ return 0;
+
+ /* trap_exit must have an argument. */
+ if (!args || TREE_CODE (args) != TREE_LIST)
+ return 0;
+
+ /* The argument must be a constant integer. */
+ if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
+ return 0;
+
+ trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
+ return 1;
+ }
+}
+
\f
/* Predicates used by the templates. */
{
if (register_operand (op, mode))
{
+ int regno;
+
if (GET_CODE (op) == REG)
- return (REGNO (op) != T_REG
- && REGNO (op) != PR_REG
- && REGNO (op) != FPUL_REG
- && REGNO (op) != MACH_REG
- && REGNO (op) != MACL_REG);
- return 1;
+ regno = REGNO (op);
+ else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
+ regno = REGNO (SUBREG_REG (op));
+ else
+ return 1;
+
+ return (regno != T_REG && regno != PR_REG
+ && (regno != FPUL_REG || TARGET_SH4)
+ && regno != MACH_REG && regno != MACL_REG);
+ }
+ return 0;
+}
+
+int
+fp_arith_reg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (register_operand (op, mode))
+ {
+ int regno;
+
+ if (GET_CODE (op) == REG)
+ regno = REGNO (op);
+ else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
+ regno = REGNO (SUBREG_REG (op));
+ else
+ return 1;
+
+ return (regno >= FIRST_PSEUDO_REGISTER
+ || (regno >= FIRST_FP_REG && regno <= LAST_FP_REG));
+ }
+ return 0;
+}
+
+int
+fp_extended_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_CODE (op) == FLOAT_EXTEND && GET_MODE (op) == mode)
+ {
+ op = XEXP (op, 0);
+ mode = GET_MODE (op);
+ }
+ if (register_operand (op, mode))
+ {
+ int regno;
+
+ if (GET_CODE (op) == REG)
+ regno = REGNO (op);
+ else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
+ regno = REGNO (SUBREG_REG (op));
+ else
+ return 1;
+
+ return (regno != T_REG && regno != PR_REG && regno > 15
+ && regno != MACH_REG && regno != MACL_REG);
}
return 0;
}
return 0;
REAL_VALUE_FROM_CONST_DOUBLE (r, op);
- return REAL_VALUES_EQUAL (r, dconst0);
+ return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
}
/* Nonzero if OP is a floating point value with value 1.0. */
REAL_VALUE_FROM_CONST_DOUBLE (r, op);
return REAL_VALUES_EQUAL (r, dconst1);
}
+
+int
+tertiary_reload_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ enum rtx_code code = GET_CODE (op);
+ return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
+}
+
+int
+fpscr_operand (op)
+ rtx op;
+{
+ return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
+ && GET_MODE (op) == PSImode);
+}
+
+int
+commutative_float_operator (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_MODE (op) != mode)
+ return 0;
+ switch (GET_CODE (op))
+ {
+ case PLUS:
+ case MULT:
+ return 1;
+ }
+ return 0;
+}
+
+int
+noncommutative_float_operator (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_MODE (op) != mode)
+ return 0;
+ switch (GET_CODE (op))
+ {
+ case MINUS:
+ case DIV:
+ return 1;
+ }
+ return 0;
+}
+
+int
+binary_float_operator (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_MODE (op) != mode)
+ return 0;
+ switch (GET_CODE (op))
+ {
+ case PLUS:
+ case MINUS:
+ case MULT:
+ case DIV:
+ return 1;
+ }
+ return 0;
+}
+\f
+/* Return the destination address of a branch. */
+
+int
+branch_dest (branch)
+ rtx branch;
+{
+ rtx dest = SET_SRC (PATTERN (branch));
+ int dest_uid;
+
+ if (GET_CODE (dest) == IF_THEN_ELSE)
+ dest = XEXP (dest, 1);
+ dest = XEXP (dest, 0);
+ dest_uid = INSN_UID (dest);
+ return insn_addresses[dest_uid];
+}
\f
/* Return non-zero if REG is not used after INSN.
We assume REG is a reload reg, and therefore does
if (GET_CODE (this_insn) == CALL_INSN)
code = CALL_INSN;
+ else if (GET_CODE (this_insn) == JUMP_INSN)
+ {
+ if (INSN_ANNULLED_BRANCH_P (this_insn))
+ return 0;
+ code = JUMP_INSN;
+ }
if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
return 0;
}
if (retval == 1)
return 1;
+ else if (code == JUMP_INSN)
+ return 0;
}
else if (GET_RTX_CLASS (code) == 'i')
{
}
return 1;
}
+\f
+extern struct obstack permanent_obstack;
+
+rtx
+get_fpscr_rtx ()
+{
+ static rtx fpscr_rtx;
+
+ if (! fpscr_rtx)
+ {
+ push_obstacks (&permanent_obstack, &permanent_obstack);
+ fpscr_rtx = gen_rtx (REG, PSImode, 48);
+ REG_USERVAR_P (fpscr_rtx) = 1;
+ pop_obstacks ();
+ mark_user_reg (fpscr_rtx);
+ }
+ if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
+ mark_user_reg (fpscr_rtx);
+ return fpscr_rtx;
+}
+
+void
+emit_sf_insn (pat)
+ rtx pat;
+{
+ rtx addr;
+ /* When generating reload insns, we must not create new registers. FPSCR
+ should already have the correct value, so do nothing to change it. */
+ if (! TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch0 (addr));
+ }
+ emit_insn (pat);
+ if (! TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch1 (addr));
+ }
+}
+
+void
+emit_df_insn (pat)
+ rtx pat;
+{
+ rtx addr;
+ if (TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch0 (addr));
+ }
+ emit_insn (pat);
+ if (TARGET_FPU_SINGLE && ! reload_in_progress)
+ {
+ addr = gen_reg_rtx (SImode);
+ emit_insn (gen_fpu_switch1 (addr));
+ }
+}
+
+void
+expand_sf_unop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_sf_binop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
+ get_fpscr_rtx ()));
+}
+
+void
+expand_df_unop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
+}
+
+void
+expand_df_binop (fun, operands)
+ rtx (*fun)();
+ rtx *operands;
+{
+ emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
+ get_fpscr_rtx ()));
+}
+
+void
+expand_fp_branch (compare, branch)
+ rtx (*compare) (), (*branch) ();
+{
+ (GET_MODE (sh_compare_op0) == SFmode ? emit_sf_insn : emit_df_insn)
+ ((*compare) ());
+ emit_jump_insn ((*branch) ());
+}
+\f
+/* We don't want to make fpscr call-saved, because that would prevent
+ channging it, and it would also cost an exstra instruction to save it.
+ We don't want it to be known as a global register either, because
+ that disables all flow analysis. But it has to be live at the function
+ return. Thus, we need to insert a USE at the end of the function. */
+/* This should best be called at about the time FINALIZE_PIC is called,
+ but not dependent on flag_pic. Alas, there is no suitable hook there,
+ so this gets called from HAVE_RETURN. */
+int
+emit_fpscr_use ()
+{
+ static int fpscr_uses = 0;
+
+ if (rtx_equal_function_value_matters)
+ {
+ emit_insn (gen_rtx (USE, VOIDmode, get_fpscr_rtx ()));
+ fpscr_uses++;
+ }
+ else
+ {
+ if (fpscr_uses > 1)
+ {
+ /* Due to he crude way we emit the USEs, we might end up with
+ some extra ones. Delete all but the last one. */
+ rtx insn;
+
+ for (insn = get_last_insn(); insn; insn = PREV_INSN (insn))
+ if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == USE
+ && GET_CODE (XEXP (PATTERN (insn), 0)) == REG
+ && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
+ {
+ insn = PREV_INSN (insn);
+ break;
+ }
+ for (; insn; insn = PREV_INSN (insn))
+ if (GET_CODE (insn) == INSN
+ && GET_CODE (PATTERN (insn)) == USE
+ && GET_CODE (XEXP (PATTERN (insn), 0)) == REG
+ && REGNO (XEXP (PATTERN (insn), 0)) == FPSCR_REG)
+ {
+ PUT_CODE (insn, NOTE);
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (insn) = 0;
+ }
+ }
+ fpscr_uses = 0;
+ }
+}
+\f
+/* ??? gcc does flow analysis strictly after common subexpression
+ elimination. As a result, common subespression elimination fails
+ when there are some intervening statements setting the same register.
+ If we did nothing about this, this would hurt the precision switching
+ for SH4 badly. There is some cse after reload, but it is unable to
+ undo the extra register pressure from the unused instructions, and
+ it cannot remove auto-increment loads.
+
+ A C code example that shows this flow/cse weakness for (at least) SH
+ and sparc (as of gcc ss-970706) is this:
+
+double
+f(double a)
+{
+ double d;
+ d = 0.1;
+ a += d;
+ d = 1.1;
+ d = 0.1;
+ a *= d;
+ return a;
+}
+
+ So we add another pass before common subexpression elimination, to
+ remove assignments that are dead due to a following assignment in the
+ same basic block. */
+
+int sh_flag_remove_dead_before_cse;
+
+static void
+mark_use (x, reg_set_block)
+ rtx x, *reg_set_block;
+{
+ enum rtx_code code;
+
+ if (! x)
+ return;
+ code = GET_CODE (x);
+ switch (code)
+ {
+ case REG:
+ {
+ int regno = REGNO (x);
+ int nregs = (regno < FIRST_PSEUDO_REGISTER
+ ? HARD_REGNO_NREGS (regno, GET_MODE (x))
+ : 1);
+ do
+ {
+ reg_set_block[regno + nregs - 1] = 0;
+ }
+ while (--nregs);
+ break;
+ }
+ case SET:
+ {
+ rtx dest = SET_DEST (x);
+
+ if (GET_CODE (dest) == SUBREG)
+ dest = SUBREG_REG (dest);
+ if (GET_CODE (dest) != REG)
+ mark_use (dest, reg_set_block);
+ mark_use (SET_SRC (x), reg_set_block);
+ break;
+ }
+ case CLOBBER:
+ break;
+ default:
+ {
+ char *fmt = GET_RTX_FORMAT (code);
+ int i, j;
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'e')
+ mark_use (XEXP (x, i), reg_set_block);
+ else if (fmt[i] == 'E')
+ for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+ mark_use (XVECEXP (x, i, j), reg_set_block);
+ }
+ break;
+ }
+ }
+}
+
+int
+remove_dead_before_cse ()
+{
+ rtx *reg_set_block, last, last_call, insn, set;
+ int in_libcall = 0;
+
+ /* This pass should run just once, after rtl generation. */
+
+ if (! sh_flag_remove_dead_before_cse
+ || rtx_equal_function_value_matters
+ || reload_completed)
+ return;
+
+ sh_flag_remove_dead_before_cse = 0;
+
+ reg_set_block = (rtx *)alloca (max_reg_num () * sizeof (rtx));
+ bzero ((char *)reg_set_block, max_reg_num () * sizeof (rtx));
+ last_call = last = get_last_insn ();
+ for (insn = last; insn; insn = PREV_INSN (insn))
+ {
+ if (GET_RTX_CLASS (GET_CODE (insn)) != 'i')
+ continue;
+ if (GET_CODE (insn) == JUMP_INSN)
+ {
+ last_call = last = insn;
+ continue;
+ }
+ set = single_set (insn);
+
+ /* Don't delete parts of libcalls, since that would confuse cse, loop
+ and flow. */
+ if (find_reg_note (insn, REG_RETVAL, NULL_RTX))
+ in_libcall = 1;
+ else if (in_libcall)
+ {
+ if (find_reg_note (insn, REG_LIBCALL, NULL_RTX))
+ in_libcall = 0;
+ }
+ else if (set && GET_CODE (SET_DEST (set)) == REG)
+ {
+ int regno = REGNO (SET_DEST (set));
+ rtx ref_insn = (regno < FIRST_PSEUDO_REGISTER && call_used_regs[regno]
+ ? last_call
+ : last);
+ if (reg_set_block[regno] == ref_insn
+ && (regno >= FIRST_PSEUDO_REGISTER
+ || HARD_REGNO_NREGS (regno, GET_MODE (SET_DEST (set))) == 1)
+ && (GET_CODE (insn) != CALL_INSN || CONST_CALL_P (insn)))
+ {
+ PUT_CODE (insn, NOTE);
+ NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
+ NOTE_SOURCE_FILE (insn) = 0;
+ continue;
+ }
+ else
+ reg_set_block[REGNO (SET_DEST (set))] = ref_insn;
+ }
+ if (GET_CODE (insn) == CALL_INSN)
+ {
+ last_call = insn;
+ mark_use (CALL_INSN_FUNCTION_USAGE (insn), reg_set_block);
+ }
+ mark_use (PATTERN (insn), reg_set_block);
+ }
+ return 0;
+}