/* Subroutines for insn-output.c for Sun SPARC.
- Copyright (C) 1987, 88, 89, 92-97, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
+ 1999, 2000, 2001 Free Software Foundation, Inc.
Contributed by Michael Tiemann (tiemann@cygnus.com)
64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
at Cygnus Support.
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
+#include "function.h"
#include "expr.h"
#include "recog.h"
#include "toplev.h"
+#include "ggc.h"
+#include "tm_p.h"
/* 1 if the caller has placed an "unimp" insn immediately after the call.
This is used in v8 code when calling a function that returns a structure.
static int apparent_fsize;
static int actual_fsize;
+/* Number of live general or floating point registers needed to be saved
+ (as 4-byte quantities). This is only done if TARGET_EPILOGUE. */
+static int num_gfregs;
+
/* Save the operands last given to a compare for use when we
generate a scc or bcc insn. */
88, 89, 90, 91, 92, 93, 94, 95,
96, 97, 98, 99, 100};
+/* Vector, indexed by hard register number, which contains 1
+ for a register that is allowable in a candidate for leaf
+ function treatment. */
+char sparc_leaf_regs[] =
+{ 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 0, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1};
+
#endif
/* Name of where we pretend to think the frame pointer points.
this is "%sp+something". We record "something" separately as it may be
too big for reg+constant addressing. */
-static char *frame_base_name;
+static const char *frame_base_name;
static int frame_base_offset;
-static rtx pic_setup_code PROTO((void));
-static void sparc_init_modes PROTO((void));
-static int save_regs PROTO((FILE *, int, int, char *,
+static void sparc_init_modes PARAMS ((void));
+static int save_regs PARAMS ((FILE *, int, int, const char *,
int, int, int));
-static int restore_regs PROTO((FILE *, int, int, char *, int, int));
-static void build_big_number PROTO((FILE *, int, char *));
-static int function_arg_slotno PROTO((const CUMULATIVE_ARGS *,
+static int restore_regs PARAMS ((FILE *, int, int, const char *, int, int));
+static void build_big_number PARAMS ((FILE *, int, const char *));
+static int function_arg_slotno PARAMS ((const CUMULATIVE_ARGS *,
enum machine_mode, tree, int, int,
int *, int *));
-static void sparc_output_addr_vec PROTO((rtx));
-static void sparc_output_addr_diff_vec PROTO((rtx));
-static void sparc_output_deferred_case_vectors PROTO((void));
-
-#ifdef DWARF2_DEBUGGING_INFO
-extern char *dwarf2out_cfi_label ();
-#endif
+static int supersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
+static int hypersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
+static int ultrasparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
+
+static void sparc_output_addr_vec PARAMS ((rtx));
+static void sparc_output_addr_diff_vec PARAMS ((rtx));
+static void sparc_output_deferred_case_vectors PARAMS ((void));
+static void sparc_add_gc_roots PARAMS ((void));
+static void mark_ultrasparc_pipeline_state PARAMS ((void *));
+static int check_return_regs PARAMS ((rtx));
+static int epilogue_renumber PARAMS ((rtx *, int));
+static int ultra_cmove_results_ready_p PARAMS ((rtx));
+static int ultra_fpmode_conflict_exists PARAMS ((enum machine_mode));
+static rtx *ultra_find_type PARAMS ((int, rtx *, int));
+static void ultra_build_types_avail PARAMS ((rtx *, int));
+static void ultra_flush_pipeline PARAMS ((void));
+static void ultra_rescan_pipeline_state PARAMS ((rtx *, int));
+static int set_extends PARAMS ((rtx));
+static void output_restore_regs PARAMS ((FILE *, int));
\f
/* Option handling. */
/* Code model option as passed by user. */
-char *sparc_cmodel_string;
+const char *sparc_cmodel_string;
/* Parsed value. */
enum cmodel sparc_cmodel;
-/* Record alignment options as passed by user. */
-char *sparc_align_loops_string;
-char *sparc_align_jumps_string;
-char *sparc_align_funcs_string;
-
-/* Parsed values, as a power of two. */
-int sparc_align_loops;
-int sparc_align_jumps;
-int sparc_align_funcs;
+char sparc_hard_reg_printed[8];
struct sparc_cpu_select sparc_select[] =
{
sparc_override_options ()
{
static struct code_model {
- char *name;
+ const char *name;
int value;
} cmodels[] = {
{ "32", CM_32 },
/* Map TARGET_CPU_DEFAULT to value for -m{arch,tune}=. */
static struct cpu_default {
int cpu;
- char *name;
+ const char *name;
} cpu_default[] = {
/* There must be one entry here for each TARGET_CPU value. */
{ TARGET_CPU_sparc, "cypress" },
{ TARGET_CPU_sparclet, "tsc701" },
{ TARGET_CPU_sparclite, "f930" },
{ TARGET_CPU_v8, "v8" },
+ { TARGET_CPU_hypersparc, "hypersparc" },
+ { TARGET_CPU_sparclite86x, "sparclite86x" },
{ TARGET_CPU_supersparc, "supersparc" },
{ TARGET_CPU_v9, "v9" },
{ TARGET_CPU_ultrasparc, "ultrasparc" },
struct cpu_default *def;
/* Table of values for -m{cpu,tune}=. */
static struct cpu_table {
- char *name;
+ const char *name;
enum processor_type processor;
int disable;
int enable;
The Fujitsu MB86934 is the recent sparclite chip, with an fpu. */
{ "f930", PROCESSOR_F930, MASK_ISA|MASK_FPU, MASK_SPARCLITE },
{ "f934", PROCESSOR_F934, MASK_ISA, MASK_SPARCLITE|MASK_FPU },
+ { "hypersparc", PROCESSOR_HYPERSPARC, MASK_ISA, MASK_V8|MASK_FPU },
+ { "sparclite86x", PROCESSOR_SPARCLITE86X, MASK_ISA|MASK_FPU,
+ MASK_SPARCLITE },
{ "sparclet", PROCESSOR_SPARCLET, MASK_ISA, MASK_SPARCLET },
/* TEMIC sparclet */
{ "tsc701", PROCESSOR_TSC701, MASK_ISA, MASK_SPARCLET },
{ "v9", PROCESSOR_V9, MASK_ISA, MASK_V9 },
- /* TI ultrasparc */
- { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9 },
+ /* TI ultrasparc I, II, IIi */
+ { "ultrasparc", PROCESSOR_ULTRASPARC, MASK_ISA, MASK_V9
+ /* Although insns using %y are deprecated, it is a clear win on current
+ ultrasparcs. */
+ |MASK_DEPRECATED_V8_INSNS},
{ 0, 0, 0, 0 }
};
struct cpu_table *cpu;
struct sparc_cpu_select *sel;
int fpu;
-
+
#ifndef SPARC_BI_ARCH
/* Check for unsupported architecture size. */
if (! TARGET_64BIT != DEFAULT_ARCH32_P)
+ error ("%s is not supported by this configuration",
+ DEFAULT_ARCH32_P ? "-m64" : "-m32");
+#endif
+
+ /* We force all 64bit archs to use 128 bit long double */
+ if (TARGET_64BIT && ! TARGET_LONG_DOUBLE_128)
{
- error ("%s is not supported by this configuration",
- DEFAULT_ARCH32_P ? "-m64" : "-m32");
+ error ("-mlong-double-64 not allowed with -m64");
+ target_flags |= MASK_LONG_DOUBLE_128;
}
-#endif
/* Code model selection. */
sparc_cmodel = SPARC_DEFAULT_CMODEL;
+
+#ifdef SPARC_BI_ARCH
+ if (TARGET_ARCH32)
+ sparc_cmodel = CM_32;
+#endif
+
if (sparc_cmodel_string != NULL)
{
if (TARGET_ARCH64)
}
/* If -mfpu or -mno-fpu was explicitly used, don't override with
- the processor default. */
+ the processor default. Clear MASK_FPU_SET to avoid confusing
+ the reverse mapping from switch values to names. */
if (TARGET_FPU_SET)
- target_flags = (target_flags & ~MASK_FPU) | fpu;
+ {
+ target_flags = (target_flags & ~MASK_FPU) | fpu;
+ target_flags &= ~MASK_FPU_SET;
+ }
+
+ /* Don't allow -mvis if FPU is disabled. */
+ if (! TARGET_FPU)
+ target_flags &= ~MASK_VIS;
+
+ /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
+ are available.
+ -m64 also implies v9. */
+ if (TARGET_VIS || TARGET_ARCH64)
+ target_flags |= MASK_V9;
/* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
if (TARGET_V9 && TARGET_ARCH32)
target_flags |= MASK_DEPRECATED_V8_INSNS;
- /* V8PLUS requires V9 */
- if (! TARGET_V9)
+ /* V8PLUS requires V9, makes no sense in 64 bit mode. */
+ if (! TARGET_V9 || TARGET_ARCH64)
target_flags &= ~MASK_V8PLUS;
/* Don't use stack biasing in 32 bit mode. */
if (TARGET_ARCH32)
target_flags &= ~MASK_STACK_BIAS;
- /* Don't allow -mvis if FPU is disabled. */
- if (! TARGET_FPU)
- target_flags &= ~MASK_VIS;
-
- /* Validate -malign-loops= value, or provide default. */
- if (sparc_align_loops_string)
- {
- sparc_align_loops = exact_log2 (atoi (sparc_align_loops_string));
- if (sparc_align_loops < 2 || sparc_align_loops > 7)
- fatal ("-malign-loops=%s is not between 4 and 128 or is not a power of two",
- sparc_align_loops_string);
- }
- else
- {
- /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
- its 0. This sounds a bit kludgey. */
- sparc_align_loops = 0;
- }
-
- /* Validate -malign-jumps= value, or provide default. */
- if (sparc_align_jumps_string)
- {
- sparc_align_jumps = exact_log2 (atoi (sparc_align_jumps_string));
- if (sparc_align_jumps < 2 || sparc_align_loops > 7)
- fatal ("-malign-jumps=%s is not between 4 and 128 or is not a power of two",
- sparc_align_jumps_string);
- }
- else
- {
- /* ??? This relies on ASM_OUTPUT_ALIGN to not emit the alignment if
- its 0. This sounds a bit kludgey. */
- sparc_align_jumps = 0;
- }
-
- /* Validate -malign-functions= value, or provide default. */
- if (sparc_align_funcs_string)
- {
- sparc_align_funcs = exact_log2 (atoi (sparc_align_funcs_string));
- if (sparc_align_funcs < 2 || sparc_align_loops > 7)
- fatal ("-malign-functions=%s is not between 4 and 128 or is not a power of two",
- sparc_align_funcs_string);
- }
- else
- sparc_align_funcs = DEFAULT_SPARC_ALIGN_FUNCS;
+ /* Supply a default value for align_functions. */
+ if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC)
+ align_functions = 32;
/* Validate PCC_STRUCT_RETURN. */
if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
/* Do various machine dependent initializations. */
sparc_init_modes ();
+
+ if ((profile_flag || profile_block_flag)
+ && sparc_cmodel != CM_32 && sparc_cmodel != CM_MEDLOW)
+ {
+ error ("profiling does not support code models other than medlow");
+ }
+
+ /* Register global variables with the garbage collector. */
+ sparc_add_gc_roots ();
}
\f
/* Miscellaneous utilities. */
/* Operand constraints. */
/* Return non-zero only if OP is a register of mode MODE,
- or const0_rtx. Don't allow const0_rtx if TARGET_LIVE_G0 because
- %g0 may contain anything. */
+ or const0_rtx. */
int
reg_or_0_operand (op, mode)
{
if (register_operand (op, mode))
return 1;
- if (TARGET_LIVE_G0)
- return 0;
if (op == const0_rtx)
return 1;
if (GET_MODE (op) == VOIDmode && GET_CODE (op) == CONST_DOUBLE
&& CONST_DOUBLE_HIGH (op) == 0
&& CONST_DOUBLE_LOW (op) == 0)
return 1;
- if (GET_MODE_CLASS (GET_MODE (op)) == MODE_FLOAT
- && GET_CODE (op) == CONST_DOUBLE
- && fp_zero_operand (op))
+ if (fp_zero_operand (op, mode))
return 1;
return 0;
}
/* Nonzero if OP is a floating point value with value 0.0. */
int
-fp_zero_operand (op)
+fp_zero_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (GET_MODE_CLASS (GET_MODE (op)) != MODE_FLOAT)
+ return 0;
+ return op == CONST0_RTX (mode);
+}
+
+/* Nonzero if OP is a floating point constant which can
+ be loaded into an integer register using a single
+ sethi instruction. */
+
+int
+fp_sethi_p (op)
+ rtx op;
+{
+ if (GET_CODE (op) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long i;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+ if (REAL_VALUES_EQUAL (r, dconst0) &&
+ ! REAL_VALUE_MINUS_ZERO (r))
+ return 0;
+ REAL_VALUE_TO_TARGET_SINGLE (r, i);
+ if (SPARC_SETHI_P (i))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Nonzero if OP is a floating point constant which can
+ be loaded into an integer register using a single
+ mov instruction. */
+
+int
+fp_mov_p (op)
+ rtx op;
+{
+ if (GET_CODE (op) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long i;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+ if (REAL_VALUES_EQUAL (r, dconst0) &&
+ ! REAL_VALUE_MINUS_ZERO (r))
+ return 0;
+ REAL_VALUE_TO_TARGET_SINGLE (r, i);
+ if (SPARC_SIMM13_P (i))
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Nonzero if OP is a floating point constant which can
+ be loaded into an integer register using a high/losum
+ instruction sequence. */
+
+int
+fp_high_losum_p (op)
rtx op;
{
- REAL_VALUE_TYPE r;
+ /* The constraints calling this should only be in
+ SFmode move insns, so any constant which cannot
+ be moved using a single insn will do. */
+ if (GET_CODE (op) == CONST_DOUBLE)
+ {
+ REAL_VALUE_TYPE r;
+ long i;
+
+ REAL_VALUE_FROM_CONST_DOUBLE (r, op);
+ if (REAL_VALUES_EQUAL (r, dconst0) &&
+ ! REAL_VALUE_MINUS_ZERO (r))
+ return 0;
+ REAL_VALUE_TO_TARGET_SINGLE (r, i);
+ if (! SPARC_SETHI_P (i)
+ && ! SPARC_SIMM13_P (i))
+ return 1;
+ }
- REAL_VALUE_FROM_CONST_DOUBLE (r, op);
- return (REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r));
+ return 0;
}
/* Nonzero if OP is an integer register. */
register rtx op;
enum machine_mode mode;
{
+ enum machine_mode omode = GET_MODE (op);
+
+ if (omode != mode && omode != VOIDmode && mode != VOIDmode)
+ return 0;
+
switch (GET_CODE (op))
{
case SYMBOL_REF:
|| GET_CODE (XEXP (op, 0)) == LABEL_REF)
&& GET_CODE (XEXP (op, 1)) == CONST_INT);
- /* ??? This clause seems to be irrelevant. */
- case CONST_DOUBLE:
- return GET_MODE (op) == mode;
-
default:
return 0;
}
/* Assume canonical format of symbol + constant.
Fall through. */
case CONST :
- return data_segment_operand (XEXP (op, 0));
+ return data_segment_operand (XEXP (op, 0), VOIDmode);
default :
return 0;
}
/* Assume canonical format of symbol + constant.
Fall through. */
case CONST :
- return text_segment_operand (XEXP (op, 0));
+ return text_segment_operand (XEXP (op, 0), VOIDmode);
default :
return 0;
}
enum machine_mode mode;
{
int val;
- if (register_operand (op, mode)
- || GET_CODE (op) == CONSTANT_P_RTX)
+ if (register_operand (op, mode))
return 1;
if (GET_CODE (op) != CONST_INT)
return 0;
((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
(HOST_WIDE_INT)0xffffffff : 0)))
#endif
- || GET_CODE (op) == CONSTANT_P_RTX);
+ );
}
/* The same, but only for sethi instructions. */
|| (GET_CODE (op) == CONST_DOUBLE
&& CONST_DOUBLE_HIGH (op) == 0
&& (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
- && SPARC_SETHI_P (CONST_DOUBLE_LOW (op)))
- || GET_CODE (op) == CONSTANT_P_RTX);
+ && SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
}
/* Return true if OP is a register, or is a CONST_INT that can fit in a
enum machine_mode mode;
{
return (register_operand (op, mode)
- || GET_CODE (op) == CONSTANT_P_RTX
|| (GET_CODE (op) == CONST_INT && SPARC_SIMM11_P (INTVAL (op))));
}
enum machine_mode mode;
{
return (register_operand (op, mode)
- || GET_CODE (op) == CONSTANT_P_RTX
|| (GET_CODE (op) == CONST_INT && SPARC_SIMM10_P (INTVAL (op))));
}
enum machine_mode mode;
{
return (register_operand (op, mode)
- || GET_CODE (op) == CONSTANT_P_RTX
|| (GET_CODE (op) == CONST_INT && SMALL_INT (op))
|| (! TARGET_ARCH64
&& GET_CODE (op) == CONST_DOUBLE
enum machine_mode mode;
{
return (register_operand (op, mode)
- || GET_CODE (op) == CONSTANT_P_RTX
|| (GET_CODE (op) == CONST_DOUBLE
&& (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
&& (unsigned HOST_WIDE_INT) (CONST_DOUBLE_LOW (op) + 0x400) < 0x800
enum machine_mode mode;
{
return (register_operand (op, mode)
- || GET_CODE (op) == CONSTANT_P_RTX
|| (GET_CODE (op) == CONST_DOUBLE
&& (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
&& (unsigned) (CONST_DOUBLE_LOW (op) + 0x200) < 0x400
rtx op;
enum machine_mode mode ATTRIBUTE_UNUSED;
{
- return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
- || GET_CODE (op) == CONSTANT_P_RTX);
+ return (GET_CODE (op) == CONST_INT && SMALL_INT (op));
}
int
return ((GET_CODE (op) == CONST_INT && SMALL_INT (op))
|| (GET_CODE (op) == CONST_DOUBLE
&& CONST_DOUBLE_HIGH (op) == 0
- && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op)))
- || GET_CODE (op) == CONSTANT_P_RTX);
+ && SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))));
}
/* Recognize operand values for the umul instruction. That instruction sign
{
#if HOST_BITS_PER_WIDE_INT > 32
/* All allowed constants will fit a CONST_INT. */
- return ((GET_CODE (op) == CONST_INT
- && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
- || (INTVAL (op) >= 0xFFFFF000
- && INTVAL (op) < 0x100000000)))
- || GET_CODE (op) == CONSTANT_P_RTX);
+ return (GET_CODE (op) == CONST_INT
+ && ((INTVAL (op) >= 0 && INTVAL (op) < 0x1000)
+ || (INTVAL (op) >= 0xFFFFF000
+ && INTVAL (op) < 0x100000000)));
#else
- return (((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
- || (GET_CODE (op) == CONST_DOUBLE
- && CONST_DOUBLE_HIGH (op) == 0
- && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000))
- || GET_CODE (op) == CONSTANT_P_RTX);
+ return ((GET_CODE (op) == CONST_INT && (unsigned) INTVAL (op) < 0x1000)
+ || (GET_CODE (op) == CONST_DOUBLE
+ && CONST_DOUBLE_HIGH (op) == 0
+ && (unsigned) CONST_DOUBLE_LOW (op) - 0xFFFFF000 < 0x1000));
#endif
}
return (GET_CODE (op) == REG && call_used_regs[REGNO (op)]);
}
-/* Return 1 if OP is const0_rtx, used for TARGET_LIVE_G0 insns. */
-
-int
-zero_operand (op, mode)
- rtx op;
- enum machine_mode mode ATTRIBUTE_UNUSED;
-{
- return (op == const0_rtx || GET_CODE (op) == CONSTANT_P_RTX);
-}
-
/* Return 1 if OP is a valid operand for the source of a move insn. */
int
if (mode != VOIDmode && GET_MODE (op) != VOIDmode && mode != GET_MODE (op))
return 0;
+ /* Only a tiny bit of handling for CONSTANT_P_RTX is necessary. */
+ if (GET_CODE (op) == CONST && GET_CODE (XEXP (op, 0)) == CONSTANT_P_RTX)
+ return 1;
+
/* Allow any one instruction integer constant, and all CONST_INT
variants when we are working in DImode and !arch64. */
if (GET_MODE_CLASS (mode) == MODE_INT
&& ((SPARC_SETHI_P (INTVAL (op))
&& (! TARGET_ARCH64
|| (INTVAL (op) >= 0)
- || mode == SImode))
+ || mode == SImode
+ || mode == HImode
+ || mode == QImode))
|| SPARC_SIMM13_P (INTVAL (op))
|| (mode == DImode
&& ! TARGET_ARCH64)))
(SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
&& (((CONST_DOUBLE_LOW (op) & 0x80000000) == 0
&& CONST_DOUBLE_HIGH (op) == 0)
- || (CONST_DOUBLE_HIGH (op) == -1)))
+ || (CONST_DOUBLE_HIGH (op) == -1
+ && CONST_DOUBLE_LOW (op) & 0x80000000) != 0))
#endif
))))
return 1;
- /* Always match this. */
- if (GET_CODE (op) == CONSTANT_P_RTX)
- return 1;
-
/* If !arch64 and this is a DImode const, allow it so that
the splits can be generated. */
if (! TARGET_ARCH64
if (register_operand (op, mode))
return 1;
+ if (GET_MODE_CLASS (mode) == MODE_FLOAT
+ && GET_CODE (op) == CONST_DOUBLE)
+ return 1;
+
/* If this is a SUBREG, look inside so that we handle
paradoxical ones. */
if (GET_CODE (op) == SUBREG)
or %temp4, %temp5, %reg */
/* Getting this right wrt. reloading is really tricky.
- We _MUST_ have a seperate temporary at this point,
+ We _MUST_ have a separate temporary at this point,
if we don't barf immediately instead of generating
incorrect code. */
if (temp1 == op0)
else
{
/* Getting this right wrt. reloading is really tricky.
- We _MUST_ have a seperate temporary at this point,
+ We _MUST_ have a separate temporary at this point,
so we barf immediately instead of generating
incorrect code. */
if (temp1 == op0)
/* These avoid problems when cross compiling. If we do not
go through all this hair then the optimizer will see
invalid REG_EQUAL notes or in some cases none at all. */
-static void sparc_emit_set_safe_HIGH64 PROTO ((rtx, HOST_WIDE_INT));
-static rtx gen_safe_SET64 PROTO ((rtx, HOST_WIDE_INT));
-static rtx gen_safe_OR64 PROTO ((rtx, HOST_WIDE_INT));
-static rtx gen_safe_XOR64 PROTO ((rtx, HOST_WIDE_INT));
+static void sparc_emit_set_safe_HIGH64 PARAMS ((rtx, HOST_WIDE_INT));
+static rtx gen_safe_SET64 PARAMS ((rtx, HOST_WIDE_INT));
+static rtx gen_safe_OR64 PARAMS ((rtx, HOST_WIDE_INT));
+static rtx gen_safe_XOR64 PARAMS ((rtx, HOST_WIDE_INT));
#if HOST_BITS_PER_WIDE_INT == 64
#define GEN_HIGHINT64(__x) GEN_INT ((__x) & 0xfffffc00)
opportunities. */
static void sparc_emit_set_const64_quick1
- PROTO((rtx, rtx, unsigned HOST_WIDE_INT, int));
+ PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT, int));
static void
sparc_emit_set_const64_quick1 (op0, temp, low_bits, is_neg)
}
static void sparc_emit_set_const64_quick2
- PROTO((rtx, rtx, unsigned HOST_WIDE_INT,
+ PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT, int));
static void
}
static void sparc_emit_set_const64_longway
- PROTO((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
+ PARAMS ((rtx, rtx, unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
/* Full 64-bit constant decomposition. Even though this is the
'worst' case, we still optimize a few things away. */
/* Analyze a 64-bit constant for certain properties. */
static void analyze_64bit_constant
- PROTO((unsigned HOST_WIDE_INT,
+ PARAMS ((unsigned HOST_WIDE_INT,
unsigned HOST_WIDE_INT,
int *, int *, int *));
}
static int const64_is_2insns
- PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
+ PARAMS ((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT));
static int
const64_is_2insns (high_bits, low_bits)
}
static unsigned HOST_WIDE_INT create_simple_focus_bits
- PROTO((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
+ PARAMS ((unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT,
int, int));
static unsigned HOST_WIDE_INT
rtx temp;
/* Sanity check that we know what we are working with. */
- if (! TARGET_ARCH64
- || GET_CODE (op0) != REG
- || (REGNO (op0) >= SPARC_FIRST_FP_REG
- && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
+ if (! TARGET_ARCH64)
abort ();
+ if (GET_CODE (op0) != SUBREG)
+ {
+ if (GET_CODE (op0) != REG
+ || (REGNO (op0) >= SPARC_FIRST_FP_REG
+ && REGNO (op0) <= SPARC_LAST_V9_FP_REG))
+ abort ();
+ }
+
if (reload_in_progress || reload_completed)
temp = op0;
else
sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
}
+/* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
+ return the mode to be used for the comparison. For floating-point,
+ CCFP[E]mode is used. CC_NOOVmode should be used when the first operand
+ is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
+ processing is needed. */
+
+enum machine_mode
+select_cc_mode (op, x, y)
+ enum rtx_code op;
+ rtx x;
+ rtx y ATTRIBUTE_UNUSED;
+{
+ if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
+ {
+ switch (op)
+ {
+ case EQ:
+ case NE:
+ case UNORDERED:
+ case ORDERED:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ case UNEQ:
+ case LTGT:
+ return CCFPmode;
+
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ return CCFPEmode;
+
+ default:
+ abort ();
+ }
+ }
+ else if (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
+ || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
+ {
+ if (TARGET_ARCH64 && GET_MODE (x) == DImode)
+ return CCX_NOOVmode;
+ else
+ return CC_NOOVmode;
+ }
+ else
+ {
+ if (TARGET_ARCH64 && GET_MODE (x) == DImode)
+ return CCXmode;
+ else
+ return CCmode;
+ }
+}
+
/* X and Y are two things to compare using CODE. Emit the compare insn and
return the rtx for the cc reg in the proper mode. */
gen_rtx_LABEL_REF (VOIDmode, label),
pc_rtx)));
}
+
+/* Generate a DFmode part of a hard TFmode register.
+ REG is the TFmode hard register, LOW is 1 for the
+ low 64bit of the register and 0 otherwise.
+ */
+rtx
+gen_df_reg (reg, low)
+ rtx reg;
+ int low;
+{
+ int regno = REGNO (reg);
+
+ if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
+ regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
+ return gen_rtx_REG (DFmode, regno);
+}
\f
/* Return nonzero if a return peephole merging return with
setting of output register is ok. */
if (get_attr_length (trial) != 1)
return 0;
- /* If %g0 is live, there are lots of things we can't handle.
- Rather than trying to find them all now, let's punt and only
- optimize things as necessary. */
- if (TARGET_LIVE_G0)
+ /* If there are any call-saved registers, we should scan TRIAL if it
+ does not reference them. For now just make it easy. */
+ if (num_gfregs)
return 0;
/* In the case of a true leaf function, anything can go into the delay slot.
A delay slot only exists however if the frame size is zero, otherwise
we will put an insn to adjust the stack after the return. */
- if (leaf_function)
+ if (current_function_uses_only_leaf_regs)
{
if (leaf_return_peephole_ok ())
return ((get_attr_in_uncond_branch_delay (trial)
return 0;
}
- /* If only trivial `restore' insns work, nothing can go in the
- delay slot. */
- else if (TARGET_BROKEN_SAVERESTORE)
- return 0;
-
pat = PATTERN (trial);
/* Otherwise, only operations which can be done in tandem with
- a `restore' insn can go into the delay slot. */
+ a `restore' or `return' insn can go into the delay slot. */
if (GET_CODE (SET_DEST (pat)) != REG
- || REGNO (SET_DEST (pat)) >= 32
|| REGNO (SET_DEST (pat)) < 24)
return 0;
+ /* If this instruction sets up floating point register and we have a return
+ instruction, it can probably go in. But restore will not work
+ with FP_REGS. */
+ if (REGNO (SET_DEST (pat)) >= 32)
+ {
+ if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
+ && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
+ return 1;
+ return 0;
+ }
+
/* The set of insns matched here must agree precisely with the set of
patterns paired with a RETURN in sparc.md. */
src = SET_SRC (pat);
- /* This matches "*return_[qhs]i". */
+ /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64. */
if (arith_operand (src, GET_MODE (src)))
- return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
-
+ {
+ if (TARGET_ARCH64)
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+ else
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+ }
+
/* This matches "*return_di". */
else if (arith_double_operand (src, GET_MODE (src)))
return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
&& register_operand (src, SFmode))
return 1;
+ /* If we have return instruction, anything that does not use
+ local or output registers and can go into a delay slot wins. */
+ else if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
+ && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
+ return 1;
+
/* This matches "*return_addsi". */
else if (GET_CODE (src) == PLUS
&& arith_operand (XEXP (src, 0), SImode)
|| register_operand (XEXP (src, 1), DImode)))
return 1;
+ /* This can match "*return_losum_[sd]i".
+ Catch only some cases, so that return_losum* don't have
+ to be too big. */
+ else if (GET_CODE (src) == LO_SUM
+ && ! TARGET_CM_MEDMID
+ && ((register_operand (XEXP (src, 0), SImode)
+ && immediate_operand (XEXP (src, 1), SImode))
+ || (TARGET_ARCH64
+ && register_operand (XEXP (src, 0), DImode)
+ && immediate_operand (XEXP (src, 1), DImode))))
+ return 1;
+
+ /* sll{,x} reg,1,reg2 is add reg,reg,reg2 as well. */
+ else if (GET_CODE (src) == ASHIFT
+ && (register_operand (XEXP (src, 0), SImode)
+ || register_operand (XEXP (src, 0), DImode))
+ && XEXP (src, 1) == const1_rtx)
+ return 1;
+
+ return 0;
+}
+
+/* Return nonzero if TRIAL can go into the sibling call
+ delay slot. */
+
+int
+eligible_for_sibcall_delay (trial)
+ rtx trial;
+{
+ rtx pat, src;
+
+ if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+ return 0;
+
+ if (get_attr_length (trial) != 1 || profile_block_flag == 2)
+ return 0;
+
+ pat = PATTERN (trial);
+
+ if (current_function_uses_only_leaf_regs)
+ {
+ /* If the tail call is done using the call instruction,
+ we have to restore %o7 in the delay slot. */
+ if ((TARGET_ARCH64 && ! TARGET_CM_MEDLOW) || flag_pic)
+ return 0;
+
+ /* %g1 is used to build the function address */
+ if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
+ return 0;
+
+ return 1;
+ }
+
+ /* Otherwise, only operations which can be done in tandem with
+ a `restore' insn can go into the delay slot. */
+ if (GET_CODE (SET_DEST (pat)) != REG
+ || REGNO (SET_DEST (pat)) < 24
+ || REGNO (SET_DEST (pat)) >= 32)
+ return 0;
+
+ /* If it mentions %o7, it can't go in, because sibcall will clobber it
+ in most cases. */
+ if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
+ return 0;
+
+ src = SET_SRC (pat);
+
+ if (arith_operand (src, GET_MODE (src)))
+ {
+ if (TARGET_ARCH64)
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+ else
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+ }
+
+ else if (arith_double_operand (src, GET_MODE (src)))
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+
+ else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
+ && register_operand (src, SFmode))
+ return 1;
+
+ else if (GET_CODE (src) == PLUS
+ && arith_operand (XEXP (src, 0), SImode)
+ && arith_operand (XEXP (src, 1), SImode)
+ && (register_operand (XEXP (src, 0), SImode)
+ || register_operand (XEXP (src, 1), SImode)))
+ return 1;
+
+ else if (GET_CODE (src) == PLUS
+ && arith_double_operand (XEXP (src, 0), DImode)
+ && arith_double_operand (XEXP (src, 1), DImode)
+ && (register_operand (XEXP (src, 0), DImode)
+ || register_operand (XEXP (src, 1), DImode)))
+ return 1;
+
+ else if (GET_CODE (src) == LO_SUM
+ && ! TARGET_CM_MEDMID
+ && ((register_operand (XEXP (src, 0), SImode)
+ && immediate_operand (XEXP (src, 1), SImode))
+ || (TARGET_ARCH64
+ && register_operand (XEXP (src, 0), DImode)
+ && immediate_operand (XEXP (src, 1), DImode))))
+ return 1;
+
+ else if (GET_CODE (src) == ASHIFT
+ && (register_operand (XEXP (src, 0), SImode)
+ || register_operand (XEXP (src, 0), DImode))
+ && XEXP (src, 1) == const1_rtx)
+ return 1;
+
return 0;
}
short_branch (uid1, uid2)
int uid1, uid2;
{
- unsigned int delta = insn_addresses[uid1] - insn_addresses[uid2];
- if (delta + 1024 < 2048)
+ int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
+
+ /* Leave a few words of "slop". */
+ if (delta >= -1023 && delta <= 1022)
return 1;
- /* warning ("long branch, distance %d", delta); */
+
return 0;
}
switch (flag_pic)
{
case 1:
- if (GET_CODE (recog_operand[i]) == SYMBOL_REF
- || (GET_CODE (recog_operand[i]) == CONST
- && ! (GET_CODE (XEXP (recog_operand[i], 0)) == MINUS
- && (XEXP (XEXP (recog_operand[i], 0), 0)
+ if (GET_CODE (recog_data.operand[i]) == SYMBOL_REF
+ || (GET_CODE (recog_data.operand[i]) == CONST
+ && ! (GET_CODE (XEXP (recog_data.operand[i], 0)) == MINUS
+ && (XEXP (XEXP (recog_data.operand[i], 0), 0)
== global_offset_table)
- && (GET_CODE (XEXP (XEXP (recog_operand[i], 0), 1))
+ && (GET_CODE (XEXP (XEXP (recog_data.operand[i], 0), 1))
== CONST))))
abort ();
case 2:
return orig;
}
-/* Return the RTX for insns to set the PIC register. */
-
-static rtx
-pic_setup_code ()
-{
- rtx seq;
-
- start_sequence ();
- emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
- get_pc_symbol));
- seq = gen_sequence ();
- end_sequence ();
-
- return seq;
-}
-
-/* Emit special PIC prologues and epilogues. */
+/* Emit special PIC prologues. */
void
-finalize_pic ()
+load_pic_register ()
{
/* Labels to get the PC in the prologue of this function. */
int orig_flag_pic = flag_pic;
- rtx insn;
-
- if (current_function_uses_pic_offset_table == 0)
- return;
if (! flag_pic)
abort ();
if (align > 0)
ASM_OUTPUT_ALIGN (asm_out_file, align);
ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, "LGETPC", 0);
- fputs ("\tretl\n\tadd %o7,%l7,%l7\n", asm_out_file);
+ fputs ("\tretl\n\tadd\t%o7, %l7, %l7\n", asm_out_file);
}
/* Initialize every time through, since we can't easily
get_pc_symbol = gen_rtx_SYMBOL_REF (Pmode, get_pc_symbol_name);
flag_pic = 0;
- emit_insn_after (pic_setup_code (), get_insns ());
-
- /* Insert the code in each nonlocal goto receiver.
- If you make changes here or to the nonlocal_goto_receiver
- pattern, make sure the unspec_volatile numbers still
- match. */
- for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
- if (GET_CODE (insn) == INSN && GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
- && XINT (PATTERN (insn), 1) == 5)
- emit_insn_after (pic_setup_code (), insn);
+ emit_insn (gen_get_pc (pic_offset_table_rtx, global_offset_table,
+ get_pc_symbol));
flag_pic = orig_flag_pic;
{
/* Check if the compiler has recorded some information
about the alignment of the base REG. If reload has
- completed, we already matched with proper alignments. */
- if (((regno_pointer_align != NULL
- && REGNO_POINTER_ALIGN (regno) >= desired)
- || reload_completed)
- && ((INTVAL (offset) & (desired - 1)) == 0))
+ completed, we already matched with proper alignments.
+ If not running global_alloc, reload might give us
+ unaligned pointer to local stack though. */
+ if (((cfun != 0
+ && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
+ || (optimize && reload_completed))
+ && (INTVAL (offset) & (desired - 1)) == 0)
return 1;
}
else
/* Modes for quad-word and smaller quantities. */
#define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
+/* Modes for 8-word and smaller quantities. */
+#define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
+
/* Modes for single-float quantities. We must allow any single word or
smaller quantity. This is because the fix/float conversion instructions
take integer inputs/outputs from the float registers. */
/* Modes for double-float and smaller quantities. */
#define DF_MODES (S_MODES | D_MODES)
-#define DF_MODES64 DF_MODES
-
/* Modes for double-float only quantities. */
-#define DF_ONLY_MODES ((1 << (int) DF_MODE) | (1 << (int) D_MODE))
-
-/* Modes for double-float and larger quantities. */
-#define DF_UP_MODES (DF_ONLY_MODES | TF_ONLY_MODES)
+#define DF_MODES_NO_S (D_MODES)
/* Modes for quad-float only quantities. */
#define TF_ONLY_MODES (1 << (int) TF_MODE)
/* Modes for quad-float and smaller quantities. */
#define TF_MODES (DF_MODES | TF_ONLY_MODES)
-#define TF_MODES64 (DF_MODES64 | TF_ONLY_MODES)
+/* Modes for quad-float and double-float quantities. */
+#define TF_MODES_NO_S (DF_MODES_NO_S | TF_ONLY_MODES)
+
+/* Modes for quad-float pair only quantities. */
+#define OF_ONLY_MODES (1 << (int) OF_MODE)
+
+/* Modes for quad-float pairs and smaller quantities. */
+#define OF_MODES (TF_MODES | OF_ONLY_MODES)
+
+#define OF_MODES_NO_S (TF_MODES_NO_S | OF_ONLY_MODES)
/* Modes for condition codes. */
#define CC_MODES (1 << (int) CC_MODE)
T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
- TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
- TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
- TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
- TF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
/* FP regs f32 to f63. Only the even numbered registers actually exist,
and none can hold SFmode/SImode values. */
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
/* %fcc[0123] */
CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
static int hard_64bit_mode_classes[] = {
D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+ O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
- T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
- T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
+ O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
- TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
- TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
- TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
- TF_MODES64, SF_MODES, DF_MODES64, SF_MODES, TF_MODES64, SF_MODES, DF_MODES64, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
+ OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
/* FP regs f32 to f63. Only the even numbered registers actually exist,
and none can hold SFmode/SImode values. */
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
- DF_UP_MODES, 0, DF_ONLY_MODES, 0, DF_UP_MODES, 0, DF_ONLY_MODES, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
+ OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
/* %fcc[0123] */
CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
save_regs (file, low, high, base, offset, n_regs, real_offset)
FILE *file;
int low, high;
- char *base;
+ const char *base;
int offset;
int n_regs;
int real_offset;
restore_regs (file, low, high, base, offset, n_regs)
FILE *file;
int low, high;
- char *base;
+ const char *base;
int offset;
int n_regs;
{
base, offset + 4 * n_regs, reg_names[i]),
n_regs += 2;
else
- fprintf (file, "\tld\t[%s+%d],%s\n",
+ fprintf (file, "\tld\t[%s+%d], %s\n",
base, offset + 4 * n_regs, reg_names[i]),
n_regs += 2;
else if (regs_ever_live[i+1] && ! call_used_regs[i+1])
- fprintf (file, "\tld\t[%s+%d],%s\n",
+ fprintf (file, "\tld\t[%s+%d], %s\n",
base, offset + 4 * n_regs + 4, reg_names[i+1]),
n_regs += 2;
}
return n_regs;
}
-/* Static variables we want to share between prologue and epilogue. */
-
-/* Number of live general or floating point registers needed to be saved
- (as 4-byte quantities). This is only done if TARGET_EPILOGUE. */
-static int num_gfregs;
-
-/* Compute the frame size required by the function. This function is called
- during the reload pass and also by output_function_prologue(). */
+/* Compute the frame size required by the function. This function is called
+ during the reload pass and also by output_function_prologue(). */
int
compute_frame_size (size, leaf_function)
build_big_number (file, num, reg)
FILE *file;
int num;
- char *reg;
+ const char *reg;
{
if (num >= 0 || ! TARGET_ARCH64)
{
}
}
+/* Output any necessary .register pseudo-ops. */
+void
+sparc_output_scratch_registers (file)
+ FILE *file ATTRIBUTE_UNUSED;
+{
+#ifdef HAVE_AS_REGISTER_PSEUDO_OP
+ int i;
+
+ if (TARGET_ARCH32)
+ return;
+
+ /* Check if %g[2367] were used without
+ .register being printed for them already. */
+ for (i = 2; i < 8; i++)
+ {
+ if (regs_ever_live [i]
+ && ! sparc_hard_reg_printed [i])
+ {
+ sparc_hard_reg_printed [i] = 1;
+ fprintf (file, "\t.register\t%%g%d, #scratch\n", i);
+ }
+ if (i == 3) i = 5;
+ }
+#endif
+}
+
/* Output code for the function prologue. */
void
int size;
int leaf_function;
{
+ sparc_output_scratch_registers (file);
+
/* Need to use actual_fsize, since we are also allocating
space for our callee (and our own register save area). */
actual_fsize = compute_frame_size (size, leaf_function);
if (actual_fsize == 0)
/* do nothing. */ ;
- else if (! leaf_function && ! TARGET_BROKEN_SAVERESTORE)
+ else if (! leaf_function)
{
if (actual_fsize <= 4096)
fprintf (file, "\tsave\t%%sp, -%d, %%sp\n", actual_fsize);
fprintf (file, "\tsave\t%%sp, %%g1, %%sp\n");
}
}
- else if (! leaf_function && TARGET_BROKEN_SAVERESTORE)
- {
- /* We assume the environment will properly handle or otherwise avoid
- trouble associated with an interrupt occurring after the `save' or
- trap occurring during it. */
- fprintf (file, "\tsave\n");
-
- if (actual_fsize <= 4096)
- fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize);
- else if (actual_fsize <= 8192)
- {
- fprintf (file, "\tadd\t%%fp, -4096, %%sp\n");
- fprintf (file, "\tadd\t%%fp, -%d, %%sp\n", actual_fsize - 4096);
- }
- else
- {
- build_big_number (file, -actual_fsize, "%g1");
- fprintf (file, "\tadd\t%%fp, %%g1, %%sp\n");
- }
- }
else /* leaf function */
{
if (actual_fsize <= 4096)
if (num_gfregs)
{
int offset, real_offset, n_regs;
- char *base;
+ const char *base;
real_offset = -apparent_fsize;
offset = -apparent_fsize + frame_base_offset;
}
}
+/* Output code to restore any call saved registers. */
+
+static void
+output_restore_regs (file, leaf_function)
+ FILE *file;
+ int leaf_function;
+{
+ int offset, n_regs;
+ const char *base;
+
+ offset = -apparent_fsize + frame_base_offset;
+ if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
+ {
+ build_big_number (file, offset, "%g1");
+ fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
+ base = "%g1";
+ offset = 0;
+ }
+ else
+ {
+ base = frame_base_name;
+ }
+
+ n_regs = 0;
+ if (TARGET_EPILOGUE && ! leaf_function)
+ /* ??? Originally saved regs 0-15 here. */
+ n_regs = restore_regs (file, 0, 8, base, offset, 0);
+ else if (leaf_function)
+ /* ??? Originally saved regs 0-31 here. */
+ n_regs = restore_regs (file, 0, 8, base, offset, 0);
+ if (TARGET_EPILOGUE)
+ restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
+}
+
/* Output code for the function epilogue. */
void
int size ATTRIBUTE_UNUSED;
int leaf_function;
{
- char *ret;
+ const char *ret;
if (leaf_label)
{
#endif
else if (current_function_epilogue_delay_list == 0)
- {
+ {
/* If code does not drop into the epilogue, we need
do nothing except output pending case vectors. */
rtx insn = get_last_insn ();
goto output_vectors;
}
- /* Restore any call saved registers. */
if (num_gfregs)
- {
- int offset, n_regs;
- char *base;
-
- offset = -apparent_fsize + frame_base_offset;
- if (offset < -4096 || offset + num_gfregs * 4 > 4096 - 8 /*double*/)
- {
- build_big_number (file, offset, "%g1");
- fprintf (file, "\tadd\t%s, %%g1, %%g1\n", frame_base_name);
- base = "%g1";
- offset = 0;
- }
- else
- {
- base = frame_base_name;
- }
-
- n_regs = 0;
- if (TARGET_EPILOGUE && ! leaf_function)
- /* ??? Originally saved regs 0-15 here. */
- n_regs = restore_regs (file, 0, 8, base, offset, 0);
- else if (leaf_function)
- /* ??? Originally saved regs 0-31 here. */
- n_regs = restore_regs (file, 0, 8, base, offset, 0);
- if (TARGET_EPILOGUE)
- restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
- }
+ output_restore_regs (file, leaf_function);
/* Work out how to skip the caller's unimp instruction if required. */
if (leaf_function)
/* If we wound up with things in our delay slot, flush them here. */
if (current_function_epilogue_delay_list)
{
- rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
- get_last_insn ());
- PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2,
- PATTERN (XEXP (current_function_epilogue_delay_list, 0)),
- PATTERN (insn)));
- final_scan_insn (insn, file, 1, 0, 1);
+ rtx delay = PATTERN (XEXP (current_function_epilogue_delay_list, 0));
+
+ if (TARGET_V9 && ! epilogue_renumber (&delay, 1))
+ {
+ epilogue_renumber (&delay, 0);
+ fputs (SKIP_CALLERS_UNIMP_P
+ ? "\treturn\t%i7+12\n"
+ : "\treturn\t%i7+8\n", file);
+ final_scan_insn (XEXP (current_function_epilogue_delay_list, 0), file, 1, 0, 0);
+ }
+ else
+ {
+ rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
+ get_last_insn ());
+ rtx src;
+
+ if (GET_CODE (delay) != SET)
+ abort();
+
+ src = SET_SRC (delay);
+ if (GET_CODE (src) == ASHIFT)
+ {
+ if (XEXP (src, 1) != const1_rtx)
+ abort();
+ SET_SRC (delay) = gen_rtx_PLUS (GET_MODE (src), XEXP (src, 0),
+ XEXP (src, 0));
+ }
+
+ PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2, delay, PATTERN (insn)));
+ final_scan_insn (insn, file, 1, 0, 1);
+ }
}
else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
fputs ("\treturn\t%i7+8\n\tnop\n", file);
output_vectors:
sparc_output_deferred_case_vectors ();
}
+
+/* Output a sibling call. */
+
+const char *
+output_sibcall (insn, call_operand)
+ rtx insn, call_operand;
+{
+ int leaf_regs = current_function_uses_only_leaf_regs;
+ rtx operands[3];
+ int delay_slot = dbr_sequence_length () > 0;
+
+ if (num_gfregs)
+ {
+ /* Call to restore global regs might clobber
+ the delay slot. Instead of checking for this
+ output the delay slot now. */
+ if (delay_slot)
+ {
+ rtx delay = NEXT_INSN (insn);
+
+ if (! delay)
+ abort ();
+
+ final_scan_insn (delay, asm_out_file, 1, 0, 1);
+ PATTERN (delay) = gen_blockage ();
+ INSN_CODE (delay) = -1;
+ delay_slot = 0;
+ }
+ output_restore_regs (asm_out_file, leaf_regs);
+ }
+
+ operands[0] = call_operand;
+
+ if (leaf_regs)
+ {
+#ifdef HAVE_AS_RELAX_OPTION
+ /* If as and ld are relaxing tail call insns into branch always,
+ use or %o7,%g0,X; call Y; or X,%g0,%o7 always, so that it can
+ be optimized. With sethi/jmpl as nor ld has no easy way how to
+ find out if somebody does not branch between the sethi and jmpl. */
+ int spare_slot = 0;
+#else
+ int spare_slot = ((TARGET_ARCH32 || TARGET_CM_MEDLOW) && ! flag_pic);
+#endif
+ int size = 0;
+
+ if ((actual_fsize || ! spare_slot) && delay_slot)
+ {
+ rtx delay = NEXT_INSN (insn);
+
+ if (! delay)
+ abort ();
+
+ final_scan_insn (delay, asm_out_file, 1, 0, 1);
+ PATTERN (delay) = gen_blockage ();
+ INSN_CODE (delay) = -1;
+ delay_slot = 0;
+ }
+ if (actual_fsize)
+ {
+ if (actual_fsize <= 4096)
+ size = actual_fsize;
+ else if (actual_fsize <= 8192)
+ {
+ fputs ("\tsub\t%sp, -4096, %sp\n", asm_out_file);
+ size = actual_fsize - 4096;
+ }
+ else if ((actual_fsize & 0x3ff) == 0)
+ fprintf (asm_out_file,
+ "\tsethi\t%%hi(%d), %%g1\n\tadd\t%%sp, %%g1, %%sp\n",
+ actual_fsize);
+ else
+ {
+ fprintf (asm_out_file,
+ "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n",
+ actual_fsize, actual_fsize);
+ fputs ("\tadd\t%%sp, %%g1, %%sp\n", asm_out_file);
+ }
+ }
+ if (spare_slot)
+ {
+ output_asm_insn ("sethi\t%%hi(%a0), %%g1", operands);
+ output_asm_insn ("jmpl\t%%g1 + %%lo(%a0), %%g0", operands);
+ if (size)
+ fprintf (asm_out_file, "\t sub\t%%sp, -%d, %%sp\n", size);
+ else if (! delay_slot)
+ fputs ("\t nop\n", asm_out_file);
+ }
+ else
+ {
+ if (size)
+ fprintf (asm_out_file, "\tsub\t%%sp, -%d, %%sp\n", size);
+ /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
+ it into branch if possible. */
+ output_asm_insn ("or\t%%o7, %%g0, %%g1", operands);
+ output_asm_insn ("call\t%a0, 0", operands);
+ output_asm_insn (" or\t%%g1, %%g0, %%o7", operands);
+ }
+ return "";
+ }
+
+ output_asm_insn ("call\t%a0, 0", operands);
+ if (delay_slot)
+ {
+ rtx delay = NEXT_INSN (insn), pat;
+
+ if (! delay)
+ abort ();
+
+ pat = PATTERN (delay);
+ if (GET_CODE (pat) != SET)
+ abort ();
+
+ operands[0] = SET_DEST (pat);
+ pat = SET_SRC (pat);
+ switch (GET_CODE (pat))
+ {
+ case PLUS:
+ operands[1] = XEXP (pat, 0);
+ operands[2] = XEXP (pat, 1);
+ output_asm_insn (" restore %r1, %2, %Y0", operands);
+ break;
+ case LO_SUM:
+ operands[1] = XEXP (pat, 0);
+ operands[2] = XEXP (pat, 1);
+ output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
+ break;
+ case ASHIFT:
+ operands[1] = XEXP (pat, 0);
+ output_asm_insn (" restore %r1, %r1, %Y0", operands);
+ break;
+ default:
+ operands[1] = pat;
+ output_asm_insn (" restore %%g0, %1, %Y0", operands);
+ break;
+ }
+ PATTERN (delay) = gen_blockage ();
+ INSN_CODE (delay) = -1;
+ }
+ else
+ fputs ("\t restore\n", asm_out_file);
+ return "";
+}
\f
/* Functions for handling argument passing.
init_cumulative_args (cum, fntype, libname, indirect)
CUMULATIVE_ARGS *cum;
tree fntype;
- tree libname ATTRIBUTE_UNUSED;
+ rtx libname ATTRIBUTE_UNUSED;
int indirect ATTRIBUTE_UNUSED;
{
cum->words = 0;
case HImode : case CHImode :
case SImode : case CSImode :
case DImode : case CDImode :
+ case TImode : case CTImode :
if (slotno >= SPARC_INT_ARG_MAX)
return -1;
regno = regbase + slotno;
{
rtx ret;
int slotno, named, regbase;
- int nregs, intoffset;
+ unsigned int nregs;
+ int intoffset;
};
static void function_arg_record_value_3
- PROTO((int, struct function_arg_record_value_parms *));
+ PARAMS ((HOST_WIDE_INT, struct function_arg_record_value_parms *));
static void function_arg_record_value_2
- PROTO((tree, int, struct function_arg_record_value_parms *));
+ PARAMS ((tree, HOST_WIDE_INT,
+ struct function_arg_record_value_parms *));
+static void function_arg_record_value_1
+ PARAMS ((tree, HOST_WIDE_INT,
+ struct function_arg_record_value_parms *));
static rtx function_arg_record_value
- PROTO((tree, enum machine_mode, int, int, int));
+ PARAMS ((tree, enum machine_mode, int, int, int));
static void
function_arg_record_value_1 (type, startbitpos, parms)
tree type;
- int startbitpos;
+ HOST_WIDE_INT startbitpos;
struct function_arg_record_value_parms *parms;
{
tree field;
{
if (TREE_CODE (field) == FIELD_DECL)
{
- int bitpos = startbitpos;
- if (DECL_FIELD_BITPOS (field))
- bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
+ HOST_WIDE_INT bitpos = startbitpos;
+
+ if (DECL_SIZE (field) != 0
+ && host_integerp (bit_position (field), 1))
+ bitpos += int_bit_position (field);
+
/* ??? FIXME: else assume zero offset. */
if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
- {
- function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
- }
+ function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
&& TARGET_FPU
&& ! packed_p
static void
function_arg_record_value_3 (bitpos, parms)
- int bitpos;
+ HOST_WIDE_INT bitpos;
struct function_arg_record_value_parms *parms;
{
enum machine_mode mode;
- int regno, this_slotno, intslots, intoffset;
+ unsigned int regno;
+ int this_slotno, intslots, intoffset;
rtx reg;
if (parms->intoffset == -1)
return;
+
intoffset = parms->intoffset;
parms->intoffset = -1;
at the moment but may wish to revisit. */
if (intoffset % BITS_PER_WORD != 0)
- {
- mode = mode_for_size (BITS_PER_WORD - intoffset%BITS_PER_WORD,
- MODE_INT, 0);
- }
+ mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
+ MODE_INT, 0);
else
mode = word_mode;
static void
function_arg_record_value_2 (type, startbitpos, parms)
tree type;
- int startbitpos;
+ HOST_WIDE_INT startbitpos;
struct function_arg_record_value_parms *parms;
{
tree field;
{
if (TREE_CODE (field) == FIELD_DECL)
{
- int bitpos = startbitpos;
- if (DECL_FIELD_BITPOS (field))
- bitpos += TREE_INT_CST_LOW (DECL_FIELD_BITPOS (field));
+ HOST_WIDE_INT bitpos = startbitpos;
+
+ if (DECL_SIZE (field) != 0
+ && host_integerp (bit_position (field), 1))
+ bitpos += int_bit_position (field);
+
/* ??? FIXME: else assume zero offset. */
if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
- {
- function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
- }
+ function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
&& TARGET_FPU
&& ! packed_p
{
HOST_WIDE_INT typesize = int_size_in_bytes (type);
struct function_arg_record_value_parms parms;
- int nregs;
+ unsigned int nregs;
parms.ret = NULL_RTX;
parms.slotno = slotno;
mode = mode_for_size (bytes * BITS_PER_UNIT, MODE_INT, 0);
}
}
+
+ if (TARGET_ARCH64
+ && GET_MODE_CLASS (mode) == MODE_INT
+ && GET_MODE_SIZE (mode) < UNITS_PER_WORD
+ && type && TREE_CODE (type) != UNION_TYPE)
+ mode = DImode;
if (incoming_p)
regno = BASE_RETURN_VALUE_REG (mode);
return gen_rtx_REG (mode, regno);
}
-/* Do what is necessary for `va_start'. The argument is ignored.
-
- We look at the current function to determine if stdarg or varargs
- is used and return the address of the first unnamed parameter. */
+/* Do what is necessary for `va_start'. We look at the current function
+ to determine if stdarg or varargs is used and return the address of
+ the first unnamed parameter. */
rtx
-sparc_builtin_saveregs (arglist)
- tree arglist ATTRIBUTE_UNUSED;
+sparc_builtin_saveregs ()
{
int first_reg = current_function_args_info.words;
rtx address;
GEN_INT (STACK_POINTER_OFFSET
+ UNITS_PER_WORD * first_reg));
- if (flag_check_memory_usage
+ if (current_function_check_memory_usage
&& first_reg < NPARM_REGS (word_mode))
emit_library_call (chkr_set_right_libfunc, 1, VOIDmode, 3,
address, ptr_mode,
return address;
}
+
+/* Implement `va_start' for varargs and stdarg. */
+
+void
+sparc_va_start (stdarg_p, valist, nextarg)
+ int stdarg_p ATTRIBUTE_UNUSED;
+ tree valist;
+ rtx nextarg;
+{
+ nextarg = expand_builtin_saveregs ();
+ std_expand_builtin_va_start (1, valist, nextarg);
+}
+
+/* Implement `va_arg'. */
+
+rtx
+sparc_va_arg (valist, type)
+ tree valist, type;
+{
+ HOST_WIDE_INT size, rsize, align;
+ tree addr, incr;
+ rtx addr_rtx;
+ int indirect = 0;
+
+ /* Round up sizeof(type) to a word. */
+ size = int_size_in_bytes (type);
+ rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
+ align = 0;
+
+ if (TARGET_ARCH64)
+ {
+ if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
+ align = 2 * UNITS_PER_WORD;
+
+ if (AGGREGATE_TYPE_P (type))
+ {
+ if (size > 16)
+ {
+ indirect = 1;
+ size = rsize = UNITS_PER_WORD;
+ }
+ else
+ size = rsize;
+ }
+ }
+ else
+ {
+ if (AGGREGATE_TYPE_P (type)
+ || TYPE_MODE (type) == TFmode
+ || TYPE_MODE (type) == TCmode)
+ {
+ indirect = 1;
+ size = rsize = UNITS_PER_WORD;
+ }
+ }
+
+ incr = valist;
+ if (align)
+ {
+ incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
+ build_int_2 (align - 1, 0)));
+ incr = fold (build (BIT_AND_EXPR, ptr_type_node, incr,
+ build_int_2 (-align, -1)));
+ }
+
+ addr = incr = save_expr (incr);
+ if (BYTES_BIG_ENDIAN && size < rsize)
+ {
+ addr = fold (build (PLUS_EXPR, ptr_type_node, incr,
+ build_int_2 (rsize - size, 0)));
+ }
+ incr = fold (build (PLUS_EXPR, ptr_type_node, incr,
+ build_int_2 (rsize, 0)));
+
+ incr = build (MODIFY_EXPR, ptr_type_node, valist, incr);
+ TREE_SIDE_EFFECTS (incr) = 1;
+ expand_expr (incr, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+ addr_rtx = expand_expr (addr, NULL, Pmode, EXPAND_NORMAL);
+
+ /* If the address isn't aligned properly for the type,
+ we may need to copy to a temporary.
+ FIXME: This is inefficient. Usually we can do this
+ in registers. */
+ if (align == 0
+ && TYPE_ALIGN (type) > BITS_PER_WORD
+ && !indirect)
+ {
+ /* FIXME: We really need to specify that the temporary is live
+ for the whole function because expand_builtin_va_arg wants
+ the alias set to be get_varargs_alias_set (), but in this
+ case the alias set is that for TYPE and if the memory gets
+ reused it will be reused with alias set TYPE. */
+ rtx tmp = assign_temp (type, 0, 1, 0);
+ rtx dest_addr;
+
+ addr_rtx = force_reg (Pmode, addr_rtx);
+ addr_rtx = gen_rtx_MEM (BLKmode, addr_rtx);
+ MEM_ALIAS_SET (addr_rtx) = get_varargs_alias_set ();
+ tmp = shallow_copy_rtx (tmp);
+ PUT_MODE (tmp, BLKmode);
+ MEM_ALIAS_SET (tmp) = 0;
+
+ dest_addr = emit_block_move (tmp, addr_rtx, GEN_INT (rsize),
+ BITS_PER_WORD);
+ if (dest_addr != NULL_RTX)
+ addr_rtx = dest_addr;
+ else
+ addr_rtx = XCEXP (tmp, 0, MEM);
+ }
+
+ if (indirect)
+ {
+ addr_rtx = force_reg (Pmode, addr_rtx);
+ addr_rtx = gen_rtx_MEM (Pmode, addr_rtx);
+ MEM_ALIAS_SET (addr_rtx) = get_varargs_alias_set ();
+ }
+
+ return addr_rtx;
+}
\f
/* Return the string to output a conditional branch to LABEL, which is
the operand number of the label. OP is the conditional expression.
static char v9_xcc_labelno[] = "%%xcc, %lX";
static char v9_fcc_labelno[] = "%%fccX, %lY";
char *labelno;
+ const char *branch;
int labeloff, spaces = 8;
- /* ??? !v9: FP branches cannot be preceded by another floating point insn.
- Because there is currently no concept of pre-delay slots, we can fix
- this only by always emitting a nop before a floating point branch. */
-
- if ((mode == CCFPmode || mode == CCFPEmode) && ! TARGET_V9)
- strcpy (string, "nop\n\t");
- else
- string[0] = '\0';
-
- /* If not floating-point or if EQ or NE, we can just reverse the code. */
- if (reversed
- && ((mode != CCFPmode && mode != CCFPEmode) || code == EQ || code == NE))
- code = reverse_condition (code), reversed = 0;
-
- /* Start by writing the branch condition. */
- switch (code)
+ if (reversed)
{
- case NE:
+ /* Reversal of FP compares takes care -- an ordered compare
+ becomes an unordered compare and vice versa. */
if (mode == CCFPmode || mode == CCFPEmode)
- {
- strcat (string, "fbne");
- spaces -= 4;
- }
+ code = reverse_condition_maybe_unordered (code);
else
- {
- strcpy (string, "bne");
- spaces -= 3;
- }
- break;
+ code = reverse_condition (code);
+ }
- case EQ:
- if (mode == CCFPmode || mode == CCFPEmode)
- {
- strcat (string, "fbe");
- spaces -= 3;
- }
- else
+ /* Start by writing the branch condition. */
+ if (mode == CCFPmode || mode == CCFPEmode)
+ {
+ switch (code)
{
- strcpy (string, "be");
- spaces -= 2;
- }
- break;
+ case NE:
+ branch = "fbne";
+ break;
+ case EQ:
+ branch = "fbe";
+ break;
+ case GE:
+ branch = "fbge";
+ break;
+ case GT:
+ branch = "fbg";
+ break;
+ case LE:
+ branch = "fble";
+ break;
+ case LT:
+ branch = "fbl";
+ break;
+ case UNORDERED:
+ branch = "fbu";
+ break;
+ case ORDERED:
+ branch = "fbo";
+ break;
+ case UNGT:
+ branch = "fbug";
+ break;
+ case UNLT:
+ branch = "fbul";
+ break;
+ case UNEQ:
+ branch = "fbue";
+ break;
+ case UNGE:
+ branch = "fbuge";
+ break;
+ case UNLE:
+ branch = "fbule";
+ break;
+ case LTGT:
+ branch = "fblg";
+ break;
- case GE:
- if (mode == CCFPmode || mode == CCFPEmode)
- {
- if (reversed)
- strcat (string, "fbul");
- else
- strcat (string, "fbge");
- spaces -= 4;
- }
- else if (mode == CC_NOOVmode)
- {
- strcpy (string, "bpos");
- spaces -= 4;
- }
- else
- {
- strcpy (string, "bge");
- spaces -= 3;
+ default:
+ abort ();
}
- break;
- case GT:
- if (mode == CCFPmode || mode == CCFPEmode)
+ /* ??? !v9: FP branches cannot be preceded by another floating point
+ insn. Because there is currently no concept of pre-delay slots,
+ we can fix this only by always emitting a nop before a floating
+ point branch. */
+
+ string[0] = '\0';
+ if (! TARGET_V9)
+ strcpy (string, "nop\n\t");
+ strcat (string, branch);
+ }
+ else
+ {
+ switch (code)
{
- if (reversed)
- {
- strcat (string, "fbule");
- spaces -= 5;
- }
+ case NE:
+ branch = "bne";
+ break;
+ case EQ:
+ branch = "be";
+ break;
+ case GE:
+ if (mode == CC_NOOVmode)
+ branch = "bpos";
else
- {
- strcat (string, "fbg");
- spaces -= 3;
- }
- }
- else
- {
- strcpy (string, "bg");
- spaces -= 2;
- }
- break;
-
- case LE:
- if (mode == CCFPmode || mode == CCFPEmode)
- {
- if (reversed)
- strcat (string, "fbug");
+ branch = "bge";
+ break;
+ case GT:
+ branch = "bg";
+ break;
+ case LE:
+ branch = "ble";
+ break;
+ case LT:
+ if (mode == CC_NOOVmode)
+ branch = "bneg";
else
- strcat (string, "fble");
- spaces -= 4;
- }
- else
- {
- strcpy (string, "ble");
- spaces -= 3;
- }
- break;
+ branch = "bl";
+ break;
+ case GEU:
+ branch = "bgeu";
+ break;
+ case GTU:
+ branch = "bgu";
+ break;
+ case LEU:
+ branch = "bleu";
+ break;
+ case LTU:
+ branch = "blu";
+ break;
- case LT:
- if (mode == CCFPmode || mode == CCFPEmode)
- {
- if (reversed)
- {
- strcat (string, "fbuge");
- spaces -= 5;
- }
- else
- {
- strcat (string, "fbl");
- spaces -= 3;
- }
- }
- else if (mode == CC_NOOVmode)
- {
- strcpy (string, "bneg");
- spaces -= 4;
- }
- else
- {
- strcpy (string, "bl");
- spaces -= 2;
+ default:
+ abort ();
}
- break;
-
- case GEU:
- strcpy (string, "bgeu");
- spaces -= 4;
- break;
-
- case GTU:
- strcpy (string, "bgu");
- spaces -= 3;
- break;
-
- case LEU:
- strcpy (string, "bleu");
- spaces -= 4;
- break;
-
- case LTU:
- strcpy (string, "blu");
- spaces -= 3;
- break;
-
- default:
- abort ();
+ strcpy (string, branch);
}
+ spaces -= strlen (branch);
/* Now add the annulling, the label, and a possible noop. */
if (annul)
return string;
}
+/* Emit a library call comparison between floating point X and Y.
+ COMPARISON is the rtl operator to compare with (EQ, NE, GT, etc.).
+ TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
+ values as arguments instead of the TFmode registers themselves,
+ that's why we cannot call emit_float_lib_cmp. */
+void
+sparc_emit_float_lib_cmp (x, y, comparison)
+ rtx x, y;
+ enum rtx_code comparison;
+{
+ const char *qpfunc;
+ rtx slot0, slot1, result, tem, tem2;
+ enum machine_mode mode;
+
+ switch (comparison)
+ {
+ case EQ:
+ qpfunc = (TARGET_ARCH64) ? "_Qp_feq" : "_Q_feq";
+ break;
+
+ case NE:
+ qpfunc = (TARGET_ARCH64) ? "_Qp_fne" : "_Q_fne";
+ break;
+
+ case GT:
+ qpfunc = (TARGET_ARCH64) ? "_Qp_fgt" : "_Q_fgt";
+ break;
+
+ case GE:
+ qpfunc = (TARGET_ARCH64) ? "_Qp_fge" : "_Q_fge";
+ break;
+
+ case LT:
+ qpfunc = (TARGET_ARCH64) ? "_Qp_flt" : "_Q_flt";
+ break;
+
+ case LE:
+ qpfunc = (TARGET_ARCH64) ? "_Qp_fle" : "_Q_fle";
+ break;
+
+ case ORDERED:
+ case UNORDERED:
+ case UNGT:
+ case UNLT:
+ case UNEQ:
+ case UNGE:
+ case UNLE:
+ case LTGT:
+ qpfunc = (TARGET_ARCH64) ? "_Qp_cmp" : "_Q_cmp";
+ break;
+
+ default:
+ abort();
+ break;
+ }
+
+ if (TARGET_ARCH64)
+ {
+ if (GET_CODE (x) != MEM)
+ {
+ slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
+ emit_insn (gen_rtx_SET (VOIDmode, slot0, x));
+ }
+ else
+ slot0 = x;
+
+ if (GET_CODE (y) != MEM)
+ {
+ slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode), 0);
+ emit_insn (gen_rtx_SET (VOIDmode, slot1, y));
+ }
+ else
+ slot1 = y;
+
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), 1,
+ DImode, 2,
+ XEXP (slot0, 0), Pmode,
+ XEXP (slot1, 0), Pmode);
+
+ mode = DImode;
+ }
+ else
+ {
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), 1,
+ SImode, 2,
+ x, TFmode, y, TFmode);
+
+ mode = SImode;
+ }
+
+
+ /* Immediately move the result of the libcall into a pseudo
+ register so reload doesn't clobber the value if it needs
+ the return register for a spill reg. */
+ result = gen_reg_rtx (mode);
+ emit_move_insn (result, hard_libcall_value (mode));
+
+ switch (comparison)
+ {
+ default:
+ emit_cmp_insn (result, const0_rtx, NE,
+ NULL_RTX, mode, 0, 0);
+ break;
+ case ORDERED:
+ case UNORDERED:
+ emit_cmp_insn (result, GEN_INT(3),
+ (comparison == UNORDERED) ? EQ : NE,
+ NULL_RTX, mode, 0, 0);
+ break;
+ case UNGT:
+ case UNGE:
+ emit_cmp_insn (result, const1_rtx,
+ (comparison == UNGT) ? GT : NE,
+ NULL_RTX, mode, 0, 0);
+ break;
+ case UNLE:
+ emit_cmp_insn (result, const2_rtx, NE,
+ NULL_RTX, mode, 0, 0);
+ break;
+ case UNLT:
+ tem = gen_reg_rtx (mode);
+ if (TARGET_ARCH32)
+ emit_insn (gen_andsi3 (tem, result, const1_rtx));
+ else
+ emit_insn (gen_anddi3 (tem, result, const1_rtx));
+ emit_cmp_insn (tem, const0_rtx, NE,
+ NULL_RTX, mode, 0, 0);
+ break;
+ case UNEQ:
+ case LTGT:
+ tem = gen_reg_rtx (mode);
+ if (TARGET_ARCH32)
+ emit_insn (gen_addsi3 (tem, result, const1_rtx));
+ else
+ emit_insn (gen_adddi3 (tem, result, const1_rtx));
+ tem2 = gen_reg_rtx (mode);
+ if (TARGET_ARCH32)
+ emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
+ else
+ emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
+ emit_cmp_insn (tem2, const0_rtx,
+ (comparison == UNEQ) ? EQ : NE,
+ NULL_RTX, mode, 0, 0);
+ break;
+ }
+}
+
/* Return the string to output a conditional branch to LABEL, testing
register REG. LABEL is the operand number of the label; REG is the
operand number of the reg. OP is the conditional expression. The mode
return string;
}
-/* Renumber registers in delay slot. Replace registers instead of
- renumbering because they may be shared.
+/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
+ Such instructions cannot be used in the delay slot of return insn on v9.
+ If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
+ */
- This does not handle instructions other than move. */
-
-static void
-epilogue_renumber (where)
- rtx *where;
+static int
+epilogue_renumber (where, test)
+ register rtx *where;
+ int test;
{
- rtx x = *where;
- enum rtx_code code = GET_CODE (x);
+ register const char *fmt;
+ register int i;
+ register enum rtx_code code;
+
+ if (*where == 0)
+ return 0;
+
+ code = GET_CODE (*where);
switch (code)
{
- case MEM:
- *where = x = copy_rtx (x);
- epilogue_renumber (&XEXP (x, 0));
- return;
-
case REG:
- {
- int regno = REGNO (x);
- if (regno > 8 && regno < 24)
- abort ();
- if (regno >= 24 && regno < 32)
- *where = gen_rtx_REG (GET_MODE (x), regno - 16);
- return;
- }
+ if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
+ return 1;
+ if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
+ *where = gen_rtx (REG, GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
+ case SCRATCH:
+ case CC0:
+ case PC:
case CONST_INT:
case CONST_DOUBLE:
- case CONST:
- case SYMBOL_REF:
- case LABEL_REF:
- return;
-
- case IOR:
- case AND:
- case XOR:
- case PLUS:
- case MINUS:
- epilogue_renumber (&XEXP (x, 1));
- case NEG:
- case NOT:
- epilogue_renumber (&XEXP (x, 0));
- return;
+ return 0;
default:
- debug_rtx (*where);
- abort ();
+ break;
+ }
+
+ fmt = GET_RTX_FORMAT (code);
+
+ for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
+ {
+ if (fmt[i] == 'E')
+ {
+ register int j;
+ for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
+ if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
+ return 1;
+ }
+ else if (fmt[i] == 'e'
+ && epilogue_renumber (&(XEXP (*where, i)), test))
+ return 1;
}
+ return 0;
}
/* Output assembler code to return from a function. */
-char *
+const char *
output_return (operands)
rtx *operands;
{
operands[0] = leaf_label;
return "b%* %l0%(";
}
- else if (leaf_function)
+ else if (current_function_uses_only_leaf_regs)
{
/* No delay slot in a leaf function. */
if (delay)
else
{
if ((actual_fsize & 0x3ff) != 0)
- return "sethi %%hi(%a0),%%g1\n\tor %%g1,%%lo(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
+ return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tretl\n\tadd\t%%sp, %%g1, %%sp";
else
- return "sethi %%hi(%a0),%%g1\n\tretl\n\tadd %%sp,%%g1,%%sp";
+ return "sethi\t%%hi(%a0), %%g1\n\tretl\n\tadd\t%%sp, %%g1, %%sp";
}
}
else if (TARGET_V9)
{
if (delay)
{
- epilogue_renumber (&SET_DEST (PATTERN (delay)));
- epilogue_renumber (&SET_SRC (PATTERN (delay)));
+ epilogue_renumber (&SET_DEST (PATTERN (delay)), 0);
+ epilogue_renumber (&SET_SRC (PATTERN (delay)), 0);
}
if (SKIP_CALLERS_UNIMP_P)
return "return\t%%i7+12%#";
if (regs_ever_live[15] != last_order_nonleaf)
{
last_order_nonleaf = !last_order_nonleaf;
- bcopy ((char *) reg_alloc_orders[last_order_nonleaf],
- (char *) reg_alloc_order, FIRST_PSEUDO_REGISTER * sizeof (int));
+ memcpy ((char *) reg_alloc_order,
+ (char *) reg_alloc_orders[last_order_nonleaf],
+ FIRST_PSEUDO_REGISTER * sizeof (int));
}
}
\f
case 'c' :
case 'C':
{
- enum rtx_code rc = (code == 'c'
- ? reverse_condition (GET_CODE (x))
- : GET_CODE (x));
+ enum rtx_code rc = GET_CODE (x);
+
+ if (code == 'c')
+ {
+ enum machine_mode mode = GET_MODE (XEXP (x, 0));
+ if (mode == CCFPmode || mode == CCFPEmode)
+ rc = reverse_condition_maybe_unordered (GET_CODE (x));
+ else
+ rc = reverse_condition (GET_CODE (x));
+ }
switch (rc)
{
case NE: fputs ("ne", file); break;
case GTU: fputs ("gu", file); break;
case LEU: fputs ("leu", file); break;
case LTU: fputs ("lu", file); break;
+ case LTGT: fputs ("lg", file); break;
+ case UNORDERED: fputs ("u", file); break;
+ case ORDERED: fputs ("o", file); break;
+ case UNLT: fputs ("ul", file); break;
+ case UNLE: fputs ("ule", file); break;
+ case UNGT: fputs ("ug", file); break;
+ case UNGE: fputs ("uge", file); break;
+ case UNEQ: fputs ("ue", file); break;
default: output_operand_lossage (code == 'c'
? "Invalid %%c operand"
: "Invalid %%C operand");
{
fputc ('[', file);
/* Poor Sun assembler doesn't understand absolute addressing. */
- if (CONSTANT_P (XEXP (x, 0))
- && ! TARGET_LIVE_G0)
+ if (CONSTANT_P (XEXP (x, 0)))
fputs ("%g0+", file);
output_address (XEXP (x, 0));
fputc (']', file);
|| GET_MODE_CLASS (GET_MODE (x)) == MODE_INT))
{
if (CONST_DOUBLE_HIGH (x) == 0)
- fprintf (file, "%u", CONST_DOUBLE_LOW (x));
+ fprintf (file, "%u", (unsigned int) CONST_DOUBLE_LOW (x));
else if (CONST_DOUBLE_HIGH (x) == -1
&& CONST_DOUBLE_LOW (x) < 0)
- fprintf (file, "%d", CONST_DOUBLE_LOW (x));
+ fprintf (file, "%d", (int) CONST_DOUBLE_LOW (x));
else
output_operand_lossage ("long long constant not a valid immediate operand");
}
SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
*/
+#ifdef TRANSFER_FROM_TRAMPOLINE
+ emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
+ 0, VOIDmode, 1, tramp, Pmode);
+#endif
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
expand_binop (SImode, ior_optab,
sparc64_initialize_trampoline (tramp, fnaddr, cxt)
rtx tramp, fnaddr, cxt;
{
+#ifdef TRANSFER_FROM_TRAMPOLINE
+ emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
+ 0, VOIDmode, 1, tramp, Pmode);
+#endif
+
/*
rd %pc, %g1
ldx [%g1+24], %g5
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
GEN_INT (0xca586018));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
- GEN_INT (0x81c04000));
+ GEN_INT (0x81c14000));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
GEN_INT (0xca586010));
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
- emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 20)), fnaddr);
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, tramp))));
+ emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
+ emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
+
if (sparc_cpu != PROCESSOR_ULTRASPARC)
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
+ emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
}
\f
/* Subroutines to support a flat (single) register window calling
/* This is the size of the 16 word reg save area, 1 word struct addr
area, and 4 word fp/alu register copy area. */
- extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
- var_size = size;
- /* Also include the size needed for the 6 parameter registers. */
- args_size = current_function_outgoing_args_size + 24;
- total_size = var_size + args_size + extra_size;
- gp_reg_size = 0;
- fp_reg_size = 0;
- gmask = 0;
- fmask = 0;
- reg_offset = 0;
+ extra_size = -STARTING_FRAME_OFFSET + FIRST_PARM_OFFSET(0);
+ var_size = size;
+ gp_reg_size = 0;
+ fp_reg_size = 0;
+ gmask = 0;
+ fmask = 0;
+ reg_offset = 0;
need_aligned_p = 0;
+ args_size = 0;
+ if (!leaf_function_p ())
+ {
+ /* Also include the size needed for the 6 parameter registers. */
+ args_size = current_function_outgoing_args_size + 24;
+ }
+ total_size = var_size + args_size;
+
/* Calculate space needed for gp registers. */
for (regno = 1; regno <= 31; regno++)
{
total_size += gp_reg_size + fp_reg_size;
}
- /* ??? This looks a little suspicious. Clarify. */
- if (total_size == extra_size)
- total_size = extra_size = 0;
+ /* If we must allocate a stack frame at all, we must also allocate
+ room for register window spillage, so as to be binary compatible
+ with libraries and operating systems that do not use -mflat. */
+ if (total_size > 0)
+ total_size += extra_size;
+ else
+ extra_size = 0;
total_size = SPARC_STACK_ALIGN (total_size);
sparc_flat_save_restore (file, base_reg, offset, gmask, fmask, word_op,
doubleword_op, base_offset)
FILE *file;
- char *base_reg;
+ const char *base_reg;
unsigned int offset;
unsigned long gmask;
unsigned long fmask;
- char *word_op;
- char *doubleword_op;
+ const char *word_op;
+ const char *doubleword_op;
unsigned long base_offset;
{
int regno;
FILE *file;
int size;
{
- char *sp_str = reg_names[STACK_POINTER_REGNUM];
+ const char *sp_str = reg_names[STACK_POINTER_REGNUM];
unsigned long gmask = current_frame_info.gmask;
+ sparc_output_scratch_registers (file);
+
/* This is only for the human reader. */
fprintf (file, "\t%s#PROLOGUE# 0\n", ASM_COMMENT_START);
fprintf (file, "\t%s# vars= %ld, regs= %d/%d, args= %d, extra= %ld\n",
if (size > 0)
{
unsigned int reg_offset = current_frame_info.reg_offset;
- char *fp_str = reg_names[FRAME_POINTER_REGNUM];
- char *t1_str = "%g1";
+ const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
+ const char *t1_str = "%g1";
/* Things get a little tricky if local variables take up more than ~4096
bytes and outgoing arguments take up more than ~4096 bytes. When that
the gdb folk first. */
/* Is the entire register save area offsettable from %sp? */
- if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
+ if (reg_offset < 4096 - 64 * (unsigned) UNITS_PER_WORD)
{
if (size <= 4096)
{
{
unsigned int reg_offset = current_frame_info.reg_offset;
unsigned int size1;
- char *sp_str = reg_names[STACK_POINTER_REGNUM];
- char *fp_str = reg_names[FRAME_POINTER_REGNUM];
- char *t1_str = "%g1";
+ const char *sp_str = reg_names[STACK_POINTER_REGNUM];
+ const char *fp_str = reg_names[FRAME_POINTER_REGNUM];
+ const char *t1_str = "%g1";
/* In the reload sequence, we don't need to fill the load delay
slots for most of the loads, also see if we can fill the final
}
/* Is the entire register save area offsettable from %sp? */
- if (reg_offset < 4096 - 64 * UNITS_PER_WORD)
+ if (reg_offset < 4096 - 64 * (unsigned) UNITS_PER_WORD)
{
size1 = 0;
}
if (get_attr_length (trial) != 1)
return 0;
- /* If %g0 is live, there are lots of things we can't handle.
- Rather than trying to find them all now, let's punt and only
- optimize things as necessary. */
- if (TARGET_LIVE_G0)
- return 0;
-
if (! reg_mentioned_p (stack_pointer_rtx, pat)
&& ! reg_mentioned_p (frame_pointer_rtx, pat))
return 1;
/* Adjust the cost of a scheduling dependency. Return the new cost of
a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
-int
+static int
supersparc_adjust_cost (insn, link, dep_insn, cost)
rtx insn;
rtx link;
return cost;
}
-/* This describes the state of the UltraSPARC pipeline during
- instruction scheduling. */
-
-#define TMASK(__x) ((unsigned)1 << ((int)(__x)))
-#define UMASK(__x) ((unsigned)1 << ((int)(__x)))
+static int
+hypersparc_adjust_cost (insn, link, dep_insn, cost)
+ rtx insn;
+ rtx link;
+ rtx dep_insn;
+ int cost;
+{
+ enum attr_type insn_type, dep_type;
+ rtx pat = PATTERN(insn);
+ rtx dep_pat = PATTERN (dep_insn);
-enum ultra_code { NONE=0, /* no insn at all */
- IEU0, /* shifts and conditional moves */
- IEU1, /* condition code setting insns, calls+jumps */
- IEUN, /* all other single cycle ieu insns */
- LSU, /* loads and stores */
- CTI, /* branches */
- FPM, /* FPU pipeline 1, multiplies and divides */
- FPA, /* FPU pipeline 2, all other operations */
- SINGLE, /* single issue instructions */
- NUM_ULTRA_CODES };
+ if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+ return cost;
-static char *ultra_code_names[NUM_ULTRA_CODES] = {
- "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
- "FPM", "FPA", "SINGLE" };
+ insn_type = get_attr_type (insn);
+ dep_type = get_attr_type (dep_insn);
-struct ultrasparc_pipeline_state {
- /* The insns in this group. */
- rtx group[4];
+ switch (REG_NOTE_KIND (link))
+ {
+ case 0:
+ /* Data dependency; DEP_INSN writes a register that INSN reads some
+ cycles later. */
- /* The code for each insn. */
- enum ultra_code codes[4];
+ switch (insn_type)
+ {
+ case TYPE_STORE:
+ case TYPE_FPSTORE:
+ /* Get the delay iff the address of the store is the dependence. */
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+ return cost;
- /* Which insns in this group have been committed by the
- scheduler. This is how we determine how many more
- can issue this cycle. */
- char commit[4];
+ if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+ return cost;
+ return cost + 3;
- /* How many insns in this group. */
- char group_size;
+ case TYPE_LOAD:
+ case TYPE_SLOAD:
+ case TYPE_FPLOAD:
+ /* If a load, then the dependence must be on the memory address. If
+ the addresses aren't equal, then it might be a false dependency */
+ if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+ {
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+ || GET_CODE (SET_DEST (dep_pat)) != MEM
+ || GET_CODE (SET_SRC (pat)) != MEM
+ || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
+ XEXP (SET_SRC (pat), 0)))
+ return cost + 2;
- /* Mask of free slots still in this group. */
- char free_slot_mask;
+ return cost + 8;
+ }
+ break;
- /* The slotter uses the following to determine what other
- insn types can still make their way into this group. */
- char contents [NUM_ULTRA_CODES];
- char num_ieu_insns;
-};
+ case TYPE_BRANCH:
+ /* Compare to branch latency is 0. There is no benefit from
+ separating compare and branch. */
+ if (dep_type == TYPE_COMPARE)
+ return 0;
+ /* Floating point compare to branch latency is less than
+ compare to conditional move. */
+ if (dep_type == TYPE_FPCMP)
+ return cost - 1;
+ break;
+ default:
+ break;
+ }
+ break;
+
+ case REG_DEP_ANTI:
+ /* Anti-dependencies only penalize the fpu unit. */
+ if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+
+ return cost;
+}
+
+static int
+ultrasparc_adjust_cost (insn, link, dep_insn, cost)
+ rtx insn;
+ rtx link;
+ rtx dep_insn;
+ int cost;
+{
+ enum attr_type insn_type, dep_type;
+ rtx pat = PATTERN(insn);
+ rtx dep_pat = PATTERN (dep_insn);
+
+ if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+ return cost;
+
+ insn_type = get_attr_type (insn);
+ dep_type = get_attr_type (dep_insn);
+
+ /* Nothing issues in parallel with integer multiplies, so
+ mark as zero cost since the scheduler can not do anything
+ about it. */
+ if (insn_type == TYPE_IMUL)
+ return 0;
+
+#define SLOW_FP(dep_type) \
+(dep_type == TYPE_FPSQRTS || dep_type == TYPE_FPSQRTD || \
+ dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
+
+ switch (REG_NOTE_KIND (link))
+ {
+ case 0:
+ /* Data dependency; DEP_INSN writes a register that INSN reads some
+ cycles later. */
+
+ if (dep_type == TYPE_CMOVE)
+ {
+ /* Instructions that read the result of conditional moves cannot
+ be in the same group or the following group. */
+ return cost + 1;
+ }
+
+ switch (insn_type)
+ {
+ /* UltraSPARC can dual issue a store and an instruction setting
+ the value stored, except for divide and square root. */
+ case TYPE_FPSTORE:
+ if (! SLOW_FP (dep_type))
+ return 0;
+ return cost;
+
+ case TYPE_STORE:
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
+ return cost;
+
+ if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
+ /* The dependency between the two instructions is on the data
+ that is being stored. Assume that the address of the store
+ is not also dependent. */
+ return 0;
+ return cost;
+
+ case TYPE_LOAD:
+ case TYPE_SLOAD:
+ case TYPE_FPLOAD:
+ /* A load does not return data until at least 11 cycles after
+ a store to the same location. 3 cycles are accounted for
+ in the load latency; add the other 8 here. */
+ if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+ {
+ /* If the addresses are not equal this may be a false
+ dependency because pointer aliasing could not be
+ determined. Add only 2 cycles in that case. 2 is
+ an arbitrary compromise between 8, which would cause
+ the scheduler to generate worse code elsewhere to
+ compensate for a dependency which might not really
+ exist, and 0. */
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+ || GET_CODE (SET_SRC (pat)) != MEM
+ || GET_CODE (SET_DEST (dep_pat)) != MEM
+ || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
+ XEXP (SET_DEST (dep_pat), 0)))
+ return cost + 2;
+
+ return cost + 8;
+ }
+ return cost;
+
+ case TYPE_BRANCH:
+ /* Compare to branch latency is 0. There is no benefit from
+ separating compare and branch. */
+ if (dep_type == TYPE_COMPARE)
+ return 0;
+ /* Floating point compare to branch latency is less than
+ compare to conditional move. */
+ if (dep_type == TYPE_FPCMP)
+ return cost - 1;
+ return cost;
+
+ case TYPE_FPCMOVE:
+ /* FMOVR class instructions can not issue in the same cycle
+ or the cycle after an instruction which writes any
+ integer register. Model this as cost 2 for dependent
+ instructions. */
+ if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
+ || dep_type == TYPE_BINARY)
+ && cost < 2)
+ return 2;
+ /* Otherwise check as for integer conditional moves. */
+
+ case TYPE_CMOVE:
+ /* Conditional moves involving integer registers wait until
+ 3 cycles after loads return data. The interlock applies
+ to all loads, not just dependent loads, but that is hard
+ to model. */
+ if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
+ return cost + 3;
+ return cost;
+
+ default:
+ break;
+ }
+ break;
+
+ case REG_DEP_ANTI:
+ /* Divide and square root lock destination registers for full latency. */
+ if (! SLOW_FP (dep_type))
+ return 0;
+ break;
+
+ case REG_DEP_OUTPUT:
+ /* IEU and FPU instruction that have the same destination
+ register cannot be grouped together. */
+ return cost + 1;
+
+ default:
+ break;
+ }
+
+ /* Other costs not accounted for:
+ - Single precision floating point loads lock the other half of
+ the even/odd register pair.
+ - Several hazards associated with ldd/std are ignored because these
+ instructions are rarely generated for V9.
+ - The floating point pipeline can not have both a single and double
+ precision operation active at the same time. Format conversions
+ and graphics instructions are given honorary double precision status.
+ - call and jmpl are always the first instruction in a group. */
+
+ return cost;
+
+#undef SLOW_FP
+}
+
+int
+sparc_adjust_cost(insn, link, dep, cost)
+ rtx insn;
+ rtx link;
+ rtx dep;
+ int cost;
+{
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_SUPERSPARC:
+ cost = supersparc_adjust_cost (insn, link, dep, cost);
+ break;
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ cost = hypersparc_adjust_cost (insn, link, dep, cost);
+ break;
+ case PROCESSOR_ULTRASPARC:
+ cost = ultrasparc_adjust_cost (insn, link, dep, cost);
+ break;
+ default:
+ break;
+ }
+ return cost;
+}
+
+/* This describes the state of the UltraSPARC pipeline during
+ instruction scheduling. */
+
+#define TMASK(__x) ((unsigned)1 << ((int)(__x)))
+#define UMASK(__x) ((unsigned)1 << ((int)(__x)))
+
+enum ultra_code { NONE=0, /* no insn at all */
+ IEU0, /* shifts and conditional moves */
+ IEU1, /* condition code setting insns, calls+jumps */
+ IEUN, /* all other single cycle ieu insns */
+ LSU, /* loads and stores */
+ CTI, /* branches */
+ FPM, /* FPU pipeline 1, multiplies and divides */
+ FPA, /* FPU pipeline 2, all other operations */
+ SINGLE, /* single issue instructions */
+ NUM_ULTRA_CODES };
+
+static enum ultra_code ultra_code_from_mask PARAMS ((int));
+static void ultra_schedule_insn PARAMS ((rtx *, rtx *, int, enum ultra_code));
+
+static const char *ultra_code_names[NUM_ULTRA_CODES] = {
+ "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
+ "FPM", "FPA", "SINGLE" };
+
+struct ultrasparc_pipeline_state {
+ /* The insns in this group. */
+ rtx group[4];
+
+ /* The code for each insn. */
+ enum ultra_code codes[4];
+
+ /* Which insns in this group have been committed by the
+ scheduler. This is how we determine how many more
+ can issue this cycle. */
+ char commit[4];
+
+ /* How many insns in this group. */
+ char group_size;
+
+ /* Mask of free slots still in this group. */
+ char free_slot_mask;
+
+ /* The slotter uses the following to determine what other
+ insn types can still make their way into this group. */
+ char contents [NUM_ULTRA_CODES];
+ char num_ieu_insns;
+};
#define ULTRA_NUM_HIST 8
static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
return IEU0;
else if (type_mask & (TMASK (TYPE_COMPARE) |
TMASK (TYPE_CALL) |
+ TMASK (TYPE_SIBCALL) |
TMASK (TYPE_UNCOND_BRANCH)))
return IEU1;
else if (type_mask & (TMASK (TYPE_IALU) | TMASK (TYPE_BINARY) |
TMASK (TYPE_FPSTORE)))
return LSU;
else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
- TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRT)))
+ TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRTS) |
+ TMASK (TYPE_FPSQRTD)))
return FPM;
else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
/* If this got dispatched in the previous
group, the results are not ready. */
- entry = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
+ entry = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
up = &ultra_pipe_hist[entry];
slot = 4;
while (--slot >= 0)
int hist_ent;
int hist_lim;
- hist_ent = (ultra_cur_hist - 1) % (ULTRA_NUM_HIST - 1);
+ hist_ent = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
if (ultra_cycles_elapsed < 4)
hist_lim = ultra_cycles_elapsed;
else
&& GET_CODE (SET_SRC (pat)) != NEG
&& ((TMASK (get_attr_type (insn)) &
(TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
- TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRT) |
+ TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRTS) |
+ TMASK (TYPE_FPSQRTD) |
TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
return 1;
}
hist_lim--;
- hist_ent = (hist_ent - 1) % (ULTRA_NUM_HIST - 1);
+ hist_ent = (hist_ent - 1) & (ULTRA_NUM_HIST - 1);
}
/* No conflicts, safe to dispatch. */
{
int pipe_slot;
char mask = ultra_pipe.free_slot_mask;
+ rtx temp;
/* Obtain free slot. */
for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
ultra_pipe.commit[pipe_slot] = 0;
/* Update ready list. */
- if (ip != &ready[this])
+ temp = *ip;
+ while (ip != &ready[this])
{
- rtx temp = *ip;
-
- *ip = ready[this];
- ready[this] = temp;
+ ip[0] = ip[1];
+ ++ip;
}
+ *ip = temp;
}
/* Advance to the next pipeline group. */
static void
ultra_flush_pipeline ()
{
- ultra_cur_hist = (ultra_cur_hist + 1) % (ULTRA_NUM_HIST - 1);
+ ultra_cur_hist = (ultra_cur_hist + 1) & (ULTRA_NUM_HIST - 1);
ultra_cycles_elapsed += 1;
- bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
+ memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
ultra_pipe.free_slot_mask = 0xf;
}
-static int ultra_reorder_called_this_block;
-
/* Init our data structures for this current block. */
void
ultrasparc_sched_init (dump, sched_verbose)
FILE *dump ATTRIBUTE_UNUSED;
int sched_verbose ATTRIBUTE_UNUSED;
{
- bzero ((char *) ultra_pipe_hist, sizeof ultra_pipe_hist);
+ memset ((char *) ultra_pipe_hist, 0, sizeof ultra_pipe_hist);
ultra_cur_hist = 0;
ultra_cycles_elapsed = 0;
- ultra_reorder_called_this_block = 0;
ultra_pipe.free_slot_mask = 0xf;
}
struct ultrasparc_pipeline_state *up = &ultra_pipe;
int i, this_insn;
- /* We get called once unnecessarily per block of insns
- scheduled. */
- if (ultra_reorder_called_this_block == 0)
- {
- ultra_reorder_called_this_block = 1;
- return;
- }
-
if (sched_verbose)
{
int n;
/* If we are not in the process of emptying out the pipe, try to
obtain an instruction which must be the first in it's group. */
ip = ultra_find_type ((TMASK (TYPE_CALL) |
+ TMASK (TYPE_SIBCALL) |
TMASK (TYPE_CALL_NO_DELAY_SLOT) |
TMASK (TYPE_UNCOND_BRANCH)),
ready, this_insn);
}
else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
TMASK (TYPE_FPDIVD) |
- TMASK (TYPE_FPSQRT)),
+ TMASK (TYPE_FPSQRTS) |
+ TMASK (TYPE_FPSQRTD)),
ready, this_insn)) != 0)
{
ultra_schedule_insn (ip, ready, this_insn, FPM);
}
else
{
- bzero ((char *) &ultra_pipe, sizeof ultra_pipe);
+ memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
ultra_pipe.free_slot_mask = 0xf;
}
}
}
}
-int
-ultrasparc_adjust_cost (insn, link, dep_insn, cost)
- rtx insn;
- rtx link;
- rtx dep_insn;
- int cost;
-{
- enum attr_type insn_type, dep_type;
- rtx pat = PATTERN(insn);
- rtx dep_pat = PATTERN (dep_insn);
-
- if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
- return cost;
-
- insn_type = get_attr_type (insn);
- dep_type = get_attr_type (dep_insn);
-
- /* Nothing issues in parallel with integer multiplies, so
- mark as zero cost since the scheduler can not do anything
- about it. */
- if (insn_type == TYPE_IMUL)
- return 0;
-
-#define SLOW_FP(dep_type) \
-(dep_type == TYPE_FPSQRT || dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
-
- switch (REG_NOTE_KIND (link))
- {
- case 0:
- /* Data dependency; DEP_INSN writes a register that INSN reads some
- cycles later. */
-
- if (dep_type == TYPE_CMOVE)
- {
- /* Instructions that read the result of conditional moves cannot
- be in the same group or the following group. */
- return cost + 1;
- }
-
- switch (insn_type)
- {
- /* UltraSPARC can dual issue a store and an instruction setting
- the value stored, except for divide and square root. */
- case TYPE_FPSTORE:
- if (! SLOW_FP (dep_type))
- return 0;
- return cost;
-
- case TYPE_STORE:
- if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
- return cost;
-
- if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
- /* The dependency between the two instructions is on the data
- that is being stored. Assume that the address of the store
- is not also dependent. */
- return 0;
- return cost;
-
- case TYPE_LOAD:
- case TYPE_SLOAD:
- case TYPE_FPLOAD:
- /* A load does not return data until at least 11 cycles after
- a store to the same location. 3 cycles are accounted for
- in the load latency; add the other 8 here. */
- if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
- {
- /* If the addresses are not equal this may be a false
- dependency because pointer aliasing could not be
- determined. Add only 2 cycles in that case. 2 is
- an arbitrary compromise between 8, which would cause
- the scheduler to generate worse code elsewhere to
- compensate for a dependency which might not really
- exist, and 0. */
- if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
- || GET_CODE (SET_SRC (pat)) != MEM
- || GET_CODE (SET_DEST (dep_pat)) != MEM
- || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
- XEXP (SET_DEST (dep_pat), 0)))
- return cost + 2;
-
- return cost + 8;
- }
- return cost;
-
- case TYPE_BRANCH:
- /* Compare to branch latency is 0. There is no benefit from
- separating compare and branch. */
- if (dep_type == TYPE_COMPARE)
- return 0;
- /* Floating point compare to branch latency is less than
- compare to conditional move. */
- if (dep_type == TYPE_FPCMP)
- return cost - 1;
- return cost;
-
- case TYPE_FPCMOVE:
- /* FMOVR class instructions can not issue in the same cycle
- or the cycle after an instruction which writes any
- integer register. Model this as cost 2 for dependent
- instructions. */
- if ((dep_type == TYPE_IALU || dep_type == TYPE_UNARY
- || dep_type == TYPE_BINARY)
- && cost < 2)
- return 2;
- /* Otherwise check as for integer conditional moves. */
-
- case TYPE_CMOVE:
- /* Conditional moves involving integer registers wait until
- 3 cycles after loads return data. The interlock applies
- to all loads, not just dependent loads, but that is hard
- to model. */
- if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
- return cost + 3;
- return cost;
-
- default:
- break;
- }
- break;
-
- case REG_DEP_ANTI:
- /* Divide and square root lock destination registers for full latency. */
- if (! SLOW_FP (dep_type))
- return 0;
- break;
-
- case REG_DEP_OUTPUT:
- /* IEU and FPU instruction that have the same destination
- register cannot be grouped together. */
- return cost + 1;
-
- default:
- break;
- }
-
- /* Other costs not accounted for:
- - Single precision floating point loads lock the other half of
- the even/odd register pair.
- - Several hazards associated with ldd/std are ignored because these
- instructions are rarely generated for V9.
- - The floating point pipeline can not have both a single and double
- precision operation active at the same time. Format conversions
- and graphics instructions are given honorary double precision status.
- - call and jmpl are always the first instruction in a group. */
-
- return cost;
-}
-
-int
-sparc_issue_rate ()
+int
+sparc_issue_rate ()
{
switch (sparc_cpu)
{
return 2;
case PROCESSOR_SUPERSPARC:
return 3;
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ return 2;
case PROCESSOR_ULTRASPARC:
return 4;
}
}
static int
-set_extends(x, insn)
- rtx x, insn;
+set_extends (insn)
+ rtx insn;
{
register rtx pat = PATTERN (insn);
return 1;
case AND:
{
+ rtx op0 = XEXP (SET_SRC (pat), 0);
rtx op1 = XEXP (SET_SRC (pat), 1);
if (GET_CODE (op1) == CONST_INT)
return INTVAL (op1) >= 0;
- if (GET_CODE (XEXP (SET_SRC (pat), 0)) == REG
- && sparc_check_64 (XEXP (SET_SRC (pat), 0), insn) == 1)
- return 1;
- if (GET_CODE (op1) == REG
- && sparc_check_64 ((op1), insn) == 1)
+ if (GET_CODE (op0) != REG)
+ return 0;
+ if (sparc_check_64 (op0, insn) == 1)
return 1;
+ return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
+ }
+ case IOR:
+ case XOR:
+ {
+ rtx op0 = XEXP (SET_SRC (pat), 0);
+ rtx op1 = XEXP (SET_SRC (pat), 1);
+ if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
+ return 0;
+ if (GET_CODE (op1) == CONST_INT)
+ return INTVAL (op1) >= 0;
+ return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
}
case ASHIFT:
case LSHIFTRT:
return GET_MODE (SET_SRC (pat)) == SImode;
/* Positive integers leave the high bits zero. */
case CONST_DOUBLE:
- return ! (CONST_DOUBLE_LOW (x) & 0x80000000);
+ return ! (CONST_DOUBLE_LOW (SET_SRC (pat)) & 0x80000000);
case CONST_INT:
- return ! (INTVAL (x) & 0x80000000);
+ return ! (INTVAL (SET_SRC (pat)) & 0x80000000);
case ASHIFTRT:
case SIGN_EXTEND:
return - (GET_MODE (SET_SRC (pat)) == SImode);
+ case REG:
+ return sparc_check_64 (SET_SRC (pat), insn);
default:
return 0;
}
the single set and return the correct value or fail to recognize
it and return 0. */
int set_once = 0;
+ rtx y = x;
- if (GET_CODE (x) == REG
- && flag_expensive_optimizations
- && REG_N_SETS (REGNO (x)) == 1)
+ if (GET_CODE (x) != REG)
+ abort ();
+
+ if (GET_MODE (x) == DImode)
+ y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
+
+ if (flag_expensive_optimizations
+ && REG_N_SETS (REGNO (y)) == 1)
set_once = 1;
if (insn == 0)
if (GET_CODE (pat) != SET)
return 0;
if (rtx_equal_p (x, SET_DEST (pat)))
- return set_extends (x, insn);
- if (reg_overlap_mentioned_p (SET_DEST (pat), x))
+ return set_extends (insn);
+ if (y && rtx_equal_p (y, SET_DEST (pat)))
+ return set_extends (insn);
+ if (reg_overlap_mentioned_p (SET_DEST (pat), y))
return 0;
}
}
sparc_v8plus_shift (operands, insn, opcode)
rtx *operands;
rtx insn;
- char *opcode;
+ const char *opcode;
{
static char asm_code[60];
operands[3] = operands[0];
if (GET_CODE (operands[1]) == CONST_INT)
{
- output_asm_insn ("mov %1,%3", operands);
+ output_asm_insn ("mov\t%1, %3", operands);
}
else
{
- output_asm_insn ("sllx %H1,32,%3", operands);
+ output_asm_insn ("sllx\t%H1, 32, %3", operands);
if (sparc_check_64 (operands[1], insn) <= 0)
- output_asm_insn ("srl %L1,0,%L1", operands);
- output_asm_insn ("or %L1,%3,%3", operands);
+ output_asm_insn ("srl\t%L1, 0, %L1", operands);
+ output_asm_insn ("or\t%L1, %3, %3", operands);
}
strcpy(asm_code, opcode);
if (which_alternative != 2)
- return strcat (asm_code, " %0,%2,%L0\n\tsrlx %L0,32,%H0");
+ return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
else
- return strcat (asm_code, " %3,%2,%3\n\tsrlx %3,32,%H0\n\tmov %3,%L0");
+ return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
}
{
if (! TARGET_V9)
return 0;
- if (leaf_function)
+ if (current_function_uses_only_leaf_regs)
return 0;
if (GET_CODE (src) != CONST_INT
&& (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
return 0;
return IN_OR_GLOBAL_P (dest);
}
+\f
+/* Output assembler code to FILE to increment profiler label # LABELNO
+ for profiling a function entry.
+
+ 32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
+ during profiling so we need to save/restore it around the call to mcount.
+ We're guaranteed that a save has just been done, and we use the space
+ allocated for intreg/fpreg value passing. */
+
+void
+sparc_function_profiler (file, labelno)
+ FILE *file;
+ int labelno;
+{
+ char buf[32];
+ ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+
+ if (! TARGET_ARCH64)
+ fputs ("\tst\t%g2, [%fp-4]\n", file);
+
+ fputs ("\tsethi\t%hi(", file);
+ assemble_name (file, buf);
+ fputs ("), %o0\n", file);
+
+ fputs ("\tcall\t", file);
+ assemble_name (file, MCOUNT_FUNCTION);
+ putc ('\n', file);
+
+ fputs ("\t or\t%o0, %lo(", file);
+ assemble_name (file, buf);
+ fputs ("), %o0\n", file);
+
+ if (! TARGET_ARCH64)
+ fputs ("\tld\t[%fp-4], %g2\n", file);
+}
+
+
+/* The following macro shall output assembler code to FILE
+ to initialize basic-block profiling.
+
+ If profile_block_flag == 2
+
+ Output code to call the subroutine `__bb_init_trace_func'
+ and pass two parameters to it. The first parameter is
+ the address of a block allocated in the object module.
+ The second parameter is the number of the first basic block
+ of the function.
+
+ The name of the block is a local symbol made with this statement:
+
+ ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
+
+ Of course, since you are writing the definition of
+ `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
+ can take a short cut in the definition of this macro and use the
+ name that you know will result.
+
+ The number of the first basic block of the function is
+ passed to the macro in BLOCK_OR_LABEL.
+
+ If described in a virtual assembler language the code to be
+ output looks like:
+
+ parameter1 <- LPBX0
+ parameter2 <- BLOCK_OR_LABEL
+ call __bb_init_trace_func
+
+ else if profile_block_flag != 0
+
+ Output code to call the subroutine `__bb_init_func'
+ and pass one single parameter to it, which is the same
+ as the first parameter to `__bb_init_trace_func'.
+
+ The first word of this parameter is a flag which will be nonzero if
+ the object module has already been initialized. So test this word
+ first, and do not call `__bb_init_func' if the flag is nonzero.
+ Note: When profile_block_flag == 2 the test need not be done
+ but `__bb_init_trace_func' *must* be called.
+
+ BLOCK_OR_LABEL may be used to generate a label number as a
+ branch destination in case `__bb_init_func' will not be called.
+
+ If described in a virtual assembler language the code to be
+ output looks like:
+
+ cmp (LPBX0),0
+ jne local_label
+ parameter1 <- LPBX0
+ call __bb_init_func
+ local_label:
+
+*/
+
+void
+sparc_function_block_profiler(file, block_or_label)
+ FILE *file;
+ int block_or_label;
+{
+ char LPBX[32];
+ ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
+
+ if (profile_block_flag == 2)
+ {
+ fputs ("\tsethi\t%hi(", file);
+ assemble_name (file, LPBX);
+ fputs ("), %o0\n", file);
+
+ fprintf (file, "\tsethi\t%%hi(%d), %%o1\n", block_or_label);
+
+ fputs ("\tor\t%o0, %lo(", file);
+ assemble_name (file, LPBX);
+ fputs ("), %o0\n", file);
+
+ fprintf (file, "\tcall\t%s__bb_init_trace_func\n", user_label_prefix);
+
+ fprintf (file, "\t or\t%%o1, %%lo(%d), %%o1\n", block_or_label);
+ }
+ else if (profile_block_flag != 0)
+ {
+ char LPBY[32];
+ ASM_GENERATE_INTERNAL_LABEL (LPBY, "LPBY", block_or_label);
+
+ fputs ("\tsethi\t%hi(", file);
+ assemble_name (file, LPBX);
+ fputs ("), %o0\n", file);
+
+ fputs ("\tld\t[%lo(", file);
+ assemble_name (file, LPBX);
+ fputs (")+%o0], %o1\n", file);
+
+ fputs ("\ttst\t%o1\n", file);
+
+ if (TARGET_V9)
+ {
+ fputs ("\tbne,pn\t%icc,", file);
+ assemble_name (file, LPBY);
+ putc ('\n', file);
+ }
+ else
+ {
+ fputs ("\tbne\t", file);
+ assemble_name (file, LPBY);
+ putc ('\n', file);
+ }
+
+ fputs ("\t or\t%o0, %lo(", file);
+ assemble_name (file, LPBX);
+ fputs ("), %o0\n", file);
+
+ fprintf (file, "\tcall\t%s__bb_init_func\n\t nop\n", user_label_prefix);
+
+ ASM_OUTPUT_INTERNAL_LABEL (file, "LPBY", block_or_label);
+ }
+}
+
+/* The following macro shall output assembler code to FILE
+ to increment a counter associated with basic block number BLOCKNO.
+
+ If profile_block_flag == 2
+
+ Output code to initialize the global structure `__bb' and
+ call the function `__bb_trace_func' which will increment the
+ counter.
+
+ `__bb' consists of two words. In the first word the number
+ of the basic block has to be stored. In the second word
+ the address of a block allocated in the object module
+ has to be stored.
+
+ The basic block number is given by BLOCKNO.
+
+ The address of the block is given by the label created with
+
+ ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 0);
+
+ by FUNCTION_BLOCK_PROFILER.
+
+ Of course, since you are writing the definition of
+ `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
+ can take a short cut in the definition of this macro and use the
+ name that you know will result.
+
+ If described in a virtual assembler language the code to be
+ output looks like:
+
+ move BLOCKNO -> (__bb)
+ move LPBX0 -> (__bb+4)
+ call __bb_trace_func
+
+ Note that function `__bb_trace_func' must not change the
+ machine state, especially the flag register. To grant
+ this, you must output code to save and restore registers
+ either in this macro or in the macros MACHINE_STATE_SAVE
+ and MACHINE_STATE_RESTORE. The last two macros will be
+ used in the function `__bb_trace_func', so you must make
+ sure that the function prologue does not change any
+ register prior to saving it with MACHINE_STATE_SAVE.
+
+ else if profile_block_flag != 0
+
+ Output code to increment the counter directly.
+ Basic blocks are numbered separately from zero within each
+ compiled object module. The count associated with block number
+ BLOCKNO is at index BLOCKNO in an array of words; the name of
+ this array is a local symbol made with this statement:
+
+ ASM_GENERATE_INTERNAL_LABEL (BUFFER, "LPBX", 2);
+
+ Of course, since you are writing the definition of
+ `ASM_GENERATE_INTERNAL_LABEL' as well as that of this macro, you
+ can take a short cut in the definition of this macro and use the
+ name that you know will result.
+
+ If described in a virtual assembler language, the code to be
+ output looks like:
+
+ inc (LPBX2+4*BLOCKNO)
+
+*/
+
+void
+sparc_block_profiler(file, blockno)
+ FILE *file;
+ int blockno;
+{
+ char LPBX[32];
+ int bbreg = TARGET_ARCH64 ? 4 : 2;
+
+ if (profile_block_flag == 2)
+ {
+ ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 0);
+
+ fprintf (file, "\tsethi\t%%hi(%s__bb), %%g1\n", user_label_prefix);
+ fprintf (file, "\tsethi\t%%hi(%d), %%g%d\n", blockno, bbreg);
+ fprintf (file, "\tor\t%%g1, %%lo(%s__bb), %%g1\n", user_label_prefix);
+ fprintf (file, "\tor\t%%g%d, %%lo(%d), %%g%d\n", bbreg, blockno, bbreg);
+
+ fprintf (file, "\tst\t%%g%d, [%%g1]\n", bbreg);
+
+ fputs ("\tsethi\t%hi(", file);
+ assemble_name (file, LPBX);
+ fprintf (file, "), %%g%d\n", bbreg);
+
+ fputs ("\tor\t%o2, %lo(", file);
+ assemble_name (file, LPBX);
+ fprintf (file, "), %%g%d\n", bbreg);
+
+ fprintf (file, "\tst\t%%g%d, [%%g1 + 4]\n", bbreg);
+ fprintf (file, "\tmov\t%%o7, %%g%d\n", bbreg);
+
+ fprintf (file, "\tcall\t%s__bb_trace_func\n\t nop\n", user_label_prefix);
+
+ fprintf (file, "\tmov\t%%g%d, %%o7\n", bbreg);
+ }
+ else if (profile_block_flag != 0)
+ {
+ ASM_GENERATE_INTERNAL_LABEL (LPBX, "LPBX", 2);
+
+ fputs ("\tsethi\t%hi(", file);
+ assemble_name (file, LPBX);
+ fprintf (file, "+%d), %%g1\n", blockno*4);
+
+ fputs ("\tld\t[%g1+%lo(", file);
+ assemble_name (file, LPBX);
+ if (TARGET_ARCH64 && USE_AS_OFFSETABLE_LO10)
+ fprintf (file, ")+%d], %%g%d\n", blockno*4, bbreg);
+ else
+ fprintf (file, "+%d)], %%g%d\n", blockno*4, bbreg);
+
+ fprintf (file, "\tadd\t%%g%d, 1, %%g%d\n", bbreg, bbreg);
+
+ fprintf (file, "\tst\t%%g%d, [%%g1+%%lo(", bbreg);
+ assemble_name (file, LPBX);
+ if (TARGET_ARCH64 && USE_AS_OFFSETABLE_LO10)
+ fprintf (file, ")+%d]\n", blockno*4);
+ else
+ fprintf (file, "+%d)]\n", blockno*4);
+ }
+}
+
+/* The following macro shall output assembler code to FILE
+ to indicate a return from function during basic-block profiling.
+
+ If profile_block_flag == 2:
+
+ Output assembler code to call function `__bb_trace_ret'.
+
+ Note that function `__bb_trace_ret' must not change the
+ machine state, especially the flag register. To grant
+ this, you must output code to save and restore registers
+ either in this macro or in the macros MACHINE_STATE_SAVE_RET
+ and MACHINE_STATE_RESTORE_RET. The last two macros will be
+ used in the function `__bb_trace_ret', so you must make
+ sure that the function prologue does not change any
+ register prior to saving it with MACHINE_STATE_SAVE_RET.
+
+ else if profile_block_flag != 0:
+
+ The macro will not be used, so it need not distinguish
+ these cases.
+*/
+
+void
+sparc_function_block_profiler_exit(file)
+ FILE *file;
+{
+ if (profile_block_flag == 2)
+ fprintf (file, "\tcall\t%s__bb_trace_ret\n\t nop\n", user_label_prefix);
+ else
+ abort ();
+}
+
+/* Mark ARG, which is really a struct ultrasparc_pipline_state *, for
+ GC. */
+
+static void
+mark_ultrasparc_pipeline_state (arg)
+ void *arg;
+{
+ struct ultrasparc_pipeline_state *ups;
+ size_t i;
+
+ ups = (struct ultrasparc_pipeline_state *) arg;
+ for (i = 0; i < sizeof (ups->group) / sizeof (rtx); ++i)
+ ggc_mark_rtx (ups->group[i]);
+}
+
+/* Called to register all of our global variables with the garbage
+ collector. */
+
+static void
+sparc_add_gc_roots ()
+{
+ ggc_add_rtx_root (&sparc_compare_op0, 1);
+ ggc_add_rtx_root (&sparc_compare_op1, 1);
+ ggc_add_rtx_root (&leaf_label, 1);
+ ggc_add_rtx_root (&global_offset_table, 1);
+ ggc_add_rtx_root (&get_pc_symbol, 1);
+ ggc_add_rtx_root (&sparc_addr_diff_list, 1);
+ ggc_add_rtx_root (&sparc_addr_list, 1);
+ ggc_add_root (ultra_pipe_hist, ARRAY_SIZE (ultra_pipe_hist),
+ sizeof (ultra_pipe_hist[0]), &mark_ultrasparc_pipeline_state);
+}