/* Subroutines for insn-output.c for Sun SPARC.
Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
- 1999, 2000, 2001 Free Software Foundation, Inc.
+ 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
Contributed by Michael Tiemann (tiemann@cygnus.com)
64 bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
at Cygnus Support.
static int apparent_fsize;
static int actual_fsize;
-/* Number of live general or floating point registers needed to be saved
- (as 4-byte quantities). This is only done if TARGET_EPILOGUE. */
+/* Number of live general or floating point registers needed to be
+ saved (as 4-byte quantities). */
static int num_gfregs;
/* Save the operands last given to a compare for use when we
generate a scc or bcc insn. */
-
rtx sparc_compare_op0, sparc_compare_op1;
-/* We may need an epilogue if we spill too many registers.
- If this is non-zero, then we branch here for the epilogue. */
-static rtx leaf_label;
-
-#ifdef LEAF_REGISTERS
+/* Coordinate with the md file wrt special insns created by
+ sparc_nonflat_function_epilogue. */
+bool sparc_emitting_epilogue;
/* Vector to say how input registers are mapped to output registers.
HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
eliminate it. You must use -fomit-frame-pointer to get that. */
-const char leaf_reg_remap[] =
+char leaf_reg_remap[] =
{ 0, 1, 2, 3, 4, 5, 6, 7,
-1, -1, -1, -1, -1, -1, 14, -1,
-1, -1, -1, -1, -1, -1, -1, -1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1};
-#endif
-
/* Name of where we pretend to think the frame pointer points.
Normally, this is "%fp", but if we are in a leaf procedure,
this is "%sp+something". We record "something" separately as it may be
static int supersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static int hypersparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
-static int ultrasparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static void sparc_output_addr_vec PARAMS ((rtx));
static void sparc_output_addr_diff_vec PARAMS ((rtx));
static void sparc_output_deferred_case_vectors PARAMS ((void));
static void sparc_add_gc_roots PARAMS ((void));
-static void mark_ultrasparc_pipeline_state PARAMS ((void *));
static int check_return_regs PARAMS ((rtx));
static int epilogue_renumber PARAMS ((rtx *, int));
static bool sparc_assemble_integer PARAMS ((rtx, unsigned int, int));
-static int ultra_cmove_results_ready_p PARAMS ((rtx));
-static int ultra_fpmode_conflict_exists PARAMS ((enum machine_mode));
-static rtx *ultra_find_type PARAMS ((int, rtx *, int));
-static void ultra_build_types_avail PARAMS ((rtx *, int));
-static void ultra_flush_pipeline PARAMS ((void));
-static void ultra_rescan_pipeline_state PARAMS ((rtx *, int));
static int set_extends PARAMS ((rtx));
static void output_restore_regs PARAMS ((FILE *, int));
static void sparc_output_function_prologue PARAMS ((FILE *, HOST_WIDE_INT));
#ifdef OBJECT_FORMAT_ELF
static void sparc_elf_asm_named_section PARAMS ((const char *, unsigned int));
#endif
-static void ultrasparc_sched_reorder PARAMS ((FILE *, int, rtx *, int));
-static int ultrasparc_variable_issue PARAMS ((rtx));
-static void ultrasparc_sched_init PARAMS ((void));
+static void sparc_aout_select_section PARAMS ((tree, int,
+ unsigned HOST_WIDE_INT))
+ ATTRIBUTE_UNUSED;
static int sparc_adjust_cost PARAMS ((rtx, rtx, rtx, int));
static int sparc_issue_rate PARAMS ((void));
-static int sparc_variable_issue PARAMS ((FILE *, int, rtx, int));
static void sparc_sched_init PARAMS ((FILE *, int, int));
-static int sparc_sched_reorder PARAMS ((FILE *, int, rtx *, int *, int));
+static int sparc_use_dfa_pipeline_interface PARAMS ((void));
+static int sparc_use_sched_lookahead PARAMS ((void));
+
+static void emit_soft_tfmode_libcall PARAMS ((const char *, int, rtx *));
+static void emit_soft_tfmode_binop PARAMS ((enum rtx_code, rtx *));
+static void emit_soft_tfmode_unop PARAMS ((enum rtx_code, rtx *));
+static void emit_soft_tfmode_cvt PARAMS ((enum rtx_code, rtx *));
+static void emit_hard_tfmode_operation PARAMS ((enum rtx_code, rtx *));
\f
/* Option handling. */
#define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
-#undef TARGET_SCHED_VARIABLE_ISSUE
-#define TARGET_SCHED_VARIABLE_ISSUE sparc_variable_issue
#undef TARGET_SCHED_INIT
#define TARGET_SCHED_INIT sparc_sched_init
-#undef TARGET_SCHED_REORDER
-#define TARGET_SCHED_REORDER sparc_sched_reorder
+#undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
+#define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE sparc_use_dfa_pipeline_interface
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
struct gcc_target targetm = TARGET_INITIALIZER;
\f
{ TARGET_CPU_supersparc, "supersparc" },
{ TARGET_CPU_v9, "v9" },
{ TARGET_CPU_ultrasparc, "ultrasparc" },
+ { TARGET_CPU_ultrasparc3, "ultrasparc3" },
{ 0, 0 }
};
const struct cpu_default *def;
/* Although insns using %y are deprecated, it is a clear win on current
ultrasparcs. */
|MASK_DEPRECATED_V8_INSNS},
+ /* TI ultrasparc III */
+ /* ??? Check if %y issue still holds true in ultra3. */
+ { "ultrasparc3", PROCESSOR_ULTRASPARC3, MASK_ISA, MASK_V9|MASK_DEPRECATED_V8_INSNS},
{ 0, 0, 0, 0 }
};
const struct cpu_table *cpu;
target_flags &= ~MASK_STACK_BIAS;
/* Supply a default value for align_functions. */
- if (align_functions == 0 && sparc_cpu == PROCESSOR_ULTRASPARC)
+ if (align_functions == 0
+ && (sparc_cpu == PROCESSOR_ULTRASPARC
+ || sparc_cpu == PROCESSOR_ULTRASPARC3))
align_functions = 32;
/* Validate PCC_STRUCT_RETURN. */
/* Do various machine dependent initializations. */
sparc_init_modes ();
- if ((profile_flag)
- && sparc_cmodel != CM_32 && sparc_cmodel != CM_MEDLOW)
- {
- error ("profiling does not support code models other than medlow");
- }
-
/* Register global variables with the garbage collector. */
sparc_add_gc_roots ();
}
return 0;
}
+/* Return non-zero only if OP is const1_rtx. */
+
+int
+const1_operand (op, mode)
+ rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ return op == const1_rtx;
+}
+
/* Nonzero if OP is a floating point value with value 0.0. */
int
return op == CONST0_RTX (mode);
}
+/* Nonzero if OP is a register operand in floating point register. */
+
+int
+fp_register_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ if (! register_operand (op, mode))
+ return 0;
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+ return GET_CODE (op) == REG && SPARC_FP_REG_P (REGNO (op));
+}
+
/* Nonzero if OP is a floating point constant which can
be loaded into an integer register using a single
sethi instruction. */
#endif
}
+/* Nonzero if OP is a floating point condition code fcc0 register. */
+
+int
+fcc0_reg_operand (op, mode)
+ rtx op;
+ enum machine_mode mode;
+{
+ /* This can happen when recog is called from combine. Op may be a MEM.
+ Fail instead of calling abort in this case. */
+ if (GET_CODE (op) != REG)
+ return 0;
+
+ if (mode != VOIDmode && mode != GET_MODE (op))
+ return 0;
+ if (mode == VOIDmode
+ && (GET_MODE (op) != CCFPmode && GET_MODE (op) != CCFPEmode))
+ return 0;
+
+ return REGNO (op) == SPARC_FCC_REG;
+}
+
/* Nonzero if OP is an integer or floating point condition code register. */
int
if (GET_RTX_CLASS (code) != '<')
return 0;
- if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode)
+ if (GET_MODE (XEXP (op, 0)) == CC_NOOVmode
+ || GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
/* These are the only branches which work with CC_NOOVmode. */
return (code == EQ || code == NE || code == GE || code == LT);
return 1;
}
+/* Return 1 if this is a 64-bit comparison operator. This allows the use of
+ MATCH_OPERATOR to recognize all the branch insns. */
+
+int
+noov_compare64_op (op, mode)
+ register rtx op;
+ enum machine_mode mode ATTRIBUTE_UNUSED;
+{
+ enum rtx_code code = GET_CODE (op);
+
+ if (! TARGET_V9)
+ return 0;
+
+ if (GET_RTX_CLASS (code) != '<')
+ return 0;
+
+ if (GET_MODE (XEXP (op, 0)) == CCX_NOOVmode)
+ /* These are the only branches which work with CCX_NOOVmode. */
+ return (code == EQ || code == NE || code == GE || code == LT);
+ return (GET_MODE (XEXP (op, 0)) == CCXmode);
+}
+
/* Nonzero if OP is a comparison operator suitable for use in v9
conditional move or branch on register contents instructions. */
rtx op;
enum machine_mode mode;
{
- int val;
if (register_operand (op, mode))
return 1;
if (GET_CODE (op) != CONST_INT)
return 0;
- val = INTVAL (op) & 0xffffffff;
- return SPARC_SIMM13_P (val);
+ return SMALL_INT32 (op);
}
/* Return true if OP is a constant 4096 */
rtx op;
enum machine_mode mode ATTRIBUTE_UNUSED;
{
- int val;
if (GET_CODE (op) != CONST_INT)
return 0;
- val = INTVAL (op) & 0xffffffff;
- return val == 4096;
+ else
+ return INTVAL (op) == 4096;
}
/* Return true if OP is suitable as second operand for add/sub */
&& SPARC_SIMM13_P (CONST_DOUBLE_LOW (op))
&& (CONST_DOUBLE_HIGH (op) ==
((CONST_DOUBLE_LOW (op) & 0x80000000) != 0 ?
- (HOST_WIDE_INT)0xffffffff : 0)))
+ (HOST_WIDE_INT)-1 : 0)))
#endif
);
}
int
const64_high_operand (op, mode)
rtx op;
- enum machine_mode mode ATTRIBUTE_UNUSED;
+ enum machine_mode mode;
{
return ((GET_CODE (op) == CONST_INT
- && (INTVAL (op) & 0xfffffc00) != 0
- && SPARC_SETHI_P (INTVAL (op))
-#if HOST_BITS_PER_WIDE_INT != 64
- /* Must be positive on non-64bit host else the
- optimizer is fooled into thinking that sethi
- sign extends, even though it does not. */
- && INTVAL (op) >= 0
-#endif
+ && (INTVAL (op) & ~(HOST_WIDE_INT)0x3ff) != 0
+ && SPARC_SETHI_P (INTVAL (op) & GET_MODE_MASK (mode))
)
|| (GET_CODE (op) == CONST_DOUBLE
&& CONST_DOUBLE_HIGH (op) == 0
- && (CONST_DOUBLE_LOW (op) & 0xfffffc00) != 0
+ && (CONST_DOUBLE_LOW (op) & ~(HOST_WIDE_INT)0x3ff) != 0
&& SPARC_SETHI_P (CONST_DOUBLE_LOW (op))));
}
variants when we are working in DImode and !arch64. */
if (GET_MODE_CLASS (mode) == MODE_INT
&& ((GET_CODE (op) == CONST_INT
- && ((SPARC_SETHI_P (INTVAL (op))
- && (! TARGET_ARCH64
- || (INTVAL (op) >= 0)
- || mode == SImode
- || mode == HImode
- || mode == QImode))
+ && (SPARC_SETHI_P (INTVAL (op) & GET_MODE_MASK (mode))
|| SPARC_SIMM13_P (INTVAL (op))
|| (mode == DImode
&& ! TARGET_ARCH64)))
{
HOST_WIDE_INT value = INTVAL (op1);
- if (SPARC_SETHI_P (value)
+ if (SPARC_SETHI_P (value & GET_MODE_MASK (mode))
|| SPARC_SIMM13_P (value))
abort ();
}
&& (INTVAL (op1) & 0x80000000) != 0)
emit_insn (gen_rtx_SET
(VOIDmode, temp,
- gen_rtx_CONST_DOUBLE (VOIDmode, INTVAL (op1) & 0xfffffc00,
- 0)));
+ immed_double_const (INTVAL (op1) & ~(HOST_WIDE_INT)0x3ff,
+ 0, DImode)));
else
emit_insn (gen_rtx_SET (VOIDmode, temp,
- GEN_INT (INTVAL (op1) & 0xfffffc00)));
+ GEN_INT (INTVAL (op1)
+ & ~(HOST_WIDE_INT)0x3ff)));
emit_insn (gen_rtx_SET (VOIDmode,
op0,
static rtx gen_safe_XOR64 PARAMS ((rtx, HOST_WIDE_INT));
#if HOST_BITS_PER_WIDE_INT == 64
-#define GEN_HIGHINT64(__x) GEN_INT ((__x) & 0xfffffc00)
+#define GEN_HIGHINT64(__x) GEN_INT ((__x) & ~(HOST_WIDE_INT)0x3ff)
#define GEN_INT64(__x) GEN_INT (__x)
#else
#define GEN_HIGHINT64(__x) \
- gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & 0xfffffc00, 0)
+ immed_double_const ((__x) & ~(HOST_WIDE_INT)0x3ff, 0, DImode)
#define GEN_INT64(__x) \
- gen_rtx_CONST_DOUBLE (VOIDmode, (__x) & 0xffffffff, \
- ((__x) & 0x80000000 \
- ? 0xffffffff : 0))
+ immed_double_const ((__x) & 0xffffffff, \
+ ((__x) & 0x80000000 ? -1 : 0), DImode)
#endif
/* The optimizer is not to assume anything about exactly
{
emit_insn (gen_rtx_SET (VOIDmode, op0,
gen_safe_XOR64 (temp,
- (-0x400 | (low_bits & 0x3ff)))));
+ (-(HOST_WIDE_INT)0x400
+ | (low_bits & 0x3ff)))));
}
}
}
negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
(((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
#else
- negated_const = gen_rtx_CONST_DOUBLE (DImode,
- (~low_bits) & 0xfffffc00,
- (~high_bits) & 0xffffffff);
+ negated_const = immed_double_const ((~low_bits) & 0xfffffc00,
+ (~high_bits) & 0xffffffff,
+ DImode);
#endif
sparc_emit_set_const64 (temp, negated_const);
}
|| GET_MODE (operands[0]) == DImode))
return 0;
- /* Handle the case where operands[0] == sparc_compare_op0.
- We "early clobber" the result. */
- if (REGNO (operands[0]) == REGNO (sparc_compare_op0))
- {
- op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
- emit_move_insn (op0, sparc_compare_op0);
- }
- else
- op0 = sparc_compare_op0;
- /* For consistency in the following. */
+ op0 = sparc_compare_op0;
op1 = sparc_compare_op1;
/* Try to use the movrCC insns. */
&& v9_regcmp_p (compare_code))
{
/* Special case for op0 != 0. This can be done with one instruction if
- operands[0] == sparc_compare_op0. We don't assume they are equal
- now though. */
+ operands[0] == sparc_compare_op0. */
if (compare_code == NE
&& GET_MODE (operands[0]) == DImode
- && GET_MODE (op0) == DImode)
+ && rtx_equal_p (op0, operands[0]))
{
- emit_insn (gen_rtx_SET (VOIDmode, operands[0], op0));
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_IF_THEN_ELSE (DImode,
gen_rtx_fmt_ee (compare_code, DImode,
return 1;
}
+ if (reg_overlap_mentioned_p (operands[0], op0))
+ {
+ /* Handle the case where operands[0] == sparc_compare_op0.
+ We "early clobber" the result. */
+ op0 = gen_reg_rtx (GET_MODE (sparc_compare_op0));
+ emit_move_insn (op0, sparc_compare_op0);
+ }
+
emit_insn (gen_rtx_SET (VOIDmode, operands[0], const0_rtx));
if (GET_MODE (op0) != DImode)
{
return gen_rtx_REG (DFmode, regno);
}
\f
-/* Return nonzero if a return peephole merging return with
- setting of output register is ok. */
-int
-leaf_return_peephole_ok ()
-{
- return (actual_fsize == 0);
-}
-
-/* Return nonzero if TRIAL can go into the function epilogue's
- delay slot. SLOT is the slot we are trying to fill. */
+/* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
+ Unlike normal calls, TFmode operands are passed by reference. It is
+ assumed that no more than 3 operands are required. */
-int
-eligible_for_epilogue_delay (trial, slot)
- rtx trial;
- int slot;
+static void
+emit_soft_tfmode_libcall (func_name, nargs, operands)
+ const char *func_name;
+ int nargs;
+ rtx *operands;
{
- rtx pat, src;
-
- if (slot >= 1)
- return 0;
+ rtx ret_slot = NULL, arg[3], func_sym;
+ int i;
- if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
- return 0;
+ /* We only expect to be called for conversions, unary, and binary ops. */
+ if (nargs < 2 || nargs > 3)
+ abort ();
- if (get_attr_length (trial) != 1)
- return 0;
+ for (i = 0; i < nargs; ++i)
+ {
+ rtx this_arg = operands[i];
+ rtx this_slot;
- /* If there are any call-saved registers, we should scan TRIAL if it
- does not reference them. For now just make it easy. */
- if (num_gfregs)
- return 0;
+ /* TFmode arguments and return values are passed by reference. */
+ if (GET_MODE (this_arg) == TFmode)
+ {
+ int force_stack_temp;
- /* If the function uses __builtin_eh_return, the eh_return machinery
- occupies the delay slot. */
- if (current_function_calls_eh_return)
- return 0;
+ force_stack_temp = 0;
+ if (TARGET_BUGGY_QP_LIB && i == 0)
+ force_stack_temp = 1;
- /* In the case of a true leaf function, anything can go into the delay slot.
- A delay slot only exists however if the frame size is zero, otherwise
- we will put an insn to adjust the stack after the return. */
- if (current_function_uses_only_leaf_regs)
- {
- if (leaf_return_peephole_ok ())
- return ((get_attr_in_uncond_branch_delay (trial)
- == IN_BRANCH_DELAY_TRUE));
- return 0;
- }
+ if (GET_CODE (this_arg) == MEM
+ && ! force_stack_temp)
+ this_arg = XEXP (this_arg, 0);
+ else if (CONSTANT_P (this_arg)
+ && ! force_stack_temp)
+ {
+ this_slot = force_const_mem (TFmode, this_arg);
+ this_arg = XEXP (this_slot, 0);
+ }
+ else
+ {
+ this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode), 0);
- pat = PATTERN (trial);
+ /* Operand 0 is the return value. We'll copy it out later. */
+ if (i > 0)
+ emit_move_insn (this_slot, this_arg);
+ else
+ ret_slot = this_slot;
- /* Otherwise, only operations which can be done in tandem with
- a `restore' or `return' insn can go into the delay slot. */
- if (GET_CODE (SET_DEST (pat)) != REG
- || REGNO (SET_DEST (pat)) < 24)
- return 0;
+ this_arg = XEXP (this_slot, 0);
+ }
+ }
- /* If this instruction sets up floating point register and we have a return
- instruction, it can probably go in. But restore will not work
- with FP_REGS. */
- if (REGNO (SET_DEST (pat)) >= 32)
- {
- if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
- && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
- return 1;
- return 0;
+ arg[i] = this_arg;
}
- /* The set of insns matched here must agree precisely with the set of
- patterns paired with a RETURN in sparc.md. */
-
- src = SET_SRC (pat);
+ func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
- /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64. */
- if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
- && arith_operand (src, GET_MODE (src)))
+ if (GET_MODE (operands[0]) == TFmode)
{
- if (TARGET_ARCH64)
- return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+ if (nargs == 2)
+ emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 2,
+ arg[0], GET_MODE (arg[0]),
+ arg[1], GET_MODE (arg[1]));
else
- return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
- }
+ emit_library_call (func_sym, LCT_NORMAL, VOIDmode, 3,
+ arg[0], GET_MODE (arg[0]),
+ arg[1], GET_MODE (arg[1]),
+ arg[2], GET_MODE (arg[2]));
- /* This matches "*return_di". */
- else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
- && arith_double_operand (src, GET_MODE (src)))
- return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+ if (ret_slot)
+ emit_move_insn (operands[0], ret_slot);
+ }
+ else
+ {
+ rtx ret;
- /* This matches "*return_sf_no_fpu". */
- else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
- && register_operand (src, SFmode))
- return 1;
+ if (nargs != 2)
+ abort ();
- /* If we have return instruction, anything that does not use
- local or output registers and can go into a delay slot wins. */
- else if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
- && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
- return 1;
+ ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
+ GET_MODE (operands[0]), 1,
+ arg[1], GET_MODE (arg[1]));
- /* This matches "*return_addsi". */
- else if (GET_CODE (src) == PLUS
- && arith_operand (XEXP (src, 0), SImode)
- && arith_operand (XEXP (src, 1), SImode)
- && (register_operand (XEXP (src, 0), SImode)
- || register_operand (XEXP (src, 1), SImode)))
- return 1;
+ if (ret != operands[0])
+ emit_move_insn (operands[0], ret);
+ }
+}
- /* This matches "*return_adddi". */
- else if (GET_CODE (src) == PLUS
- && arith_double_operand (XEXP (src, 0), DImode)
- && arith_double_operand (XEXP (src, 1), DImode)
- && (register_operand (XEXP (src, 0), DImode)
- || register_operand (XEXP (src, 1), DImode)))
- return 1;
+/* Expand soft-float TFmode calls to sparc abi routines. */
- /* This can match "*return_losum_[sd]i".
- Catch only some cases, so that return_losum* don't have
- to be too big. */
- else if (GET_CODE (src) == LO_SUM
- && ! TARGET_CM_MEDMID
- && ((register_operand (XEXP (src, 0), SImode)
- && immediate_operand (XEXP (src, 1), SImode))
- || (TARGET_ARCH64
- && register_operand (XEXP (src, 0), DImode)
- && immediate_operand (XEXP (src, 1), DImode))))
- return 1;
+static void
+emit_soft_tfmode_binop (code, operands)
+ enum rtx_code code;
+ rtx *operands;
+{
+ const char *func;
- /* sll{,x} reg,1,reg2 is add reg,reg,reg2 as well. */
- else if (GET_CODE (src) == ASHIFT
- && (register_operand (XEXP (src, 0), SImode)
- || register_operand (XEXP (src, 0), DImode))
- && XEXP (src, 1) == const1_rtx)
- return 1;
+ switch (code)
+ {
+ case PLUS:
+ func = "_Qp_add";
+ break;
+ case MINUS:
+ func = "_Qp_sub";
+ break;
+ case MULT:
+ func = "_Qp_mul";
+ break;
+ case DIV:
+ func = "_Qp_div";
+ break;
+ default:
+ abort ();
+ }
- return 0;
+ emit_soft_tfmode_libcall (func, 3, operands);
}
-/* Return nonzero if TRIAL can go into the sibling call
- delay slot. */
-
-int
-eligible_for_sibcall_delay (trial)
- rtx trial;
+static void
+emit_soft_tfmode_unop (code, operands)
+ enum rtx_code code;
+ rtx *operands;
{
- rtx pat, src;
+ const char *func;
- if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
- return 0;
+ switch (code)
+ {
+ case SQRT:
+ func = "_Qp_sqrt";
+ break;
+ default:
+ abort ();
+ }
- if (get_attr_length (trial) != 1)
- return 0;
+ emit_soft_tfmode_libcall (func, 2, operands);
+}
- pat = PATTERN (trial);
+static void
+emit_soft_tfmode_cvt (code, operands)
+ enum rtx_code code;
+ rtx *operands;
+{
+ const char *func;
- if (current_function_uses_only_leaf_regs)
+ switch (code)
{
- /* If the tail call is done using the call instruction,
- we have to restore %o7 in the delay slot. */
- if ((TARGET_ARCH64 && ! TARGET_CM_MEDLOW) || flag_pic)
- return 0;
+ case FLOAT_EXTEND:
+ switch (GET_MODE (operands[1]))
+ {
+ case SFmode:
+ func = "_Qp_stoq";
+ break;
+ case DFmode:
+ func = "_Qp_dtoq";
+ break;
+ default:
+ abort ();
+ }
+ break;
- /* %g1 is used to build the function address */
- if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
- return 0;
+ case FLOAT_TRUNCATE:
+ switch (GET_MODE (operands[0]))
+ {
+ case SFmode:
+ func = "_Qp_qtos";
+ break;
+ case DFmode:
+ func = "_Qp_qtod";
+ break;
+ default:
+ abort ();
+ }
+ break;
- return 1;
+ case FLOAT:
+ switch (GET_MODE (operands[1]))
+ {
+ case SImode:
+ func = "_Qp_itoq";
+ break;
+ case DImode:
+ func = "_Qp_xtoq";
+ break;
+ default:
+ abort ();
+ }
+ break;
+
+ case UNSIGNED_FLOAT:
+ switch (GET_MODE (operands[1]))
+ {
+ case SImode:
+ func = "_Qp_uitoq";
+ break;
+ case DImode:
+ func = "_Qp_uxtoq";
+ break;
+ default:
+ abort ();
+ }
+ break;
+
+ case FIX:
+ switch (GET_MODE (operands[0]))
+ {
+ case SImode:
+ func = "_Qp_qtoi";
+ break;
+ case DImode:
+ func = "_Qp_qtox";
+ break;
+ default:
+ abort ();
+ }
+ break;
+
+ case UNSIGNED_FIX:
+ switch (GET_MODE (operands[0]))
+ {
+ case SImode:
+ func = "_Qp_qtoui";
+ break;
+ case DImode:
+ func = "_Qp_qtoux";
+ break;
+ default:
+ abort ();
+ }
+ break;
+
+ default:
+ abort ();
+ }
+
+ emit_soft_tfmode_libcall (func, 2, operands);
+}
+
+/* Expand a hard-float tfmode operation. All arguments must be in
+ registers. */
+
+static void
+emit_hard_tfmode_operation (code, operands)
+ enum rtx_code code;
+ rtx *operands;
+{
+ rtx op, dest;
+
+ if (GET_RTX_CLASS (code) == '1')
+ {
+ operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+ op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
+ }
+ else
+ {
+ operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
+ operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+ op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
+ operands[1], operands[2]);
+ }
+
+ if (register_operand (operands[0], VOIDmode))
+ dest = operands[0];
+ else
+ dest = gen_reg_rtx (GET_MODE (operands[0]));
+
+ emit_insn (gen_rtx_SET (VOIDmode, dest, op));
+
+ if (dest != operands[0])
+ emit_move_insn (operands[0], dest);
+}
+
+void
+emit_tfmode_binop (code, operands)
+ enum rtx_code code;
+ rtx *operands;
+{
+ if (TARGET_HARD_QUAD)
+ emit_hard_tfmode_operation (code, operands);
+ else
+ emit_soft_tfmode_binop (code, operands);
+}
+
+void
+emit_tfmode_unop (code, operands)
+ enum rtx_code code;
+ rtx *operands;
+{
+ if (TARGET_HARD_QUAD)
+ emit_hard_tfmode_operation (code, operands);
+ else
+ emit_soft_tfmode_unop (code, operands);
+}
+
+void
+emit_tfmode_cvt (code, operands)
+ enum rtx_code code;
+ rtx *operands;
+{
+ if (TARGET_HARD_QUAD)
+ emit_hard_tfmode_operation (code, operands);
+ else
+ emit_soft_tfmode_cvt (code, operands);
+}
+\f
+/* Return nonzero if a return peephole merging return with
+ setting of output register is ok. */
+int
+leaf_return_peephole_ok ()
+{
+ return (actual_fsize == 0);
+}
+
+/* Return nonzero if a branch/jump/call instruction will be emitting
+ nop into its delay slot. */
+
+int
+empty_delay_slot (insn)
+ rtx insn;
+{
+ rtx seq;
+
+ /* If no previous instruction (should not happen), return true. */
+ if (PREV_INSN (insn) == NULL)
+ return 1;
+
+ seq = NEXT_INSN (PREV_INSN (insn));
+ if (GET_CODE (PATTERN (seq)) == SEQUENCE)
+ return 0;
+
+ return 1;
+}
+
+/* Return nonzero if TRIAL can go into the function epilogue's
+ delay slot. SLOT is the slot we are trying to fill. */
+
+int
+eligible_for_epilogue_delay (trial, slot)
+ rtx trial;
+ int slot;
+{
+ rtx pat, src;
+
+ if (slot >= 1)
+ return 0;
+
+ if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+ return 0;
+
+ if (get_attr_length (trial) != 1)
+ return 0;
+
+ /* If there are any call-saved registers, we should scan TRIAL if it
+ does not reference them. For now just make it easy. */
+ if (num_gfregs)
+ return 0;
+
+ /* If the function uses __builtin_eh_return, the eh_return machinery
+ occupies the delay slot. */
+ if (current_function_calls_eh_return)
+ return 0;
+
+ /* In the case of a true leaf function, anything can go into the delay slot.
+ A delay slot only exists however if the frame size is zero, otherwise
+ we will put an insn to adjust the stack after the return. */
+ if (current_function_uses_only_leaf_regs)
+ {
+ if (leaf_return_peephole_ok ())
+ return ((get_attr_in_uncond_branch_delay (trial)
+ == IN_BRANCH_DELAY_TRUE));
+ return 0;
}
+ pat = PATTERN (trial);
+
/* Otherwise, only operations which can be done in tandem with
- a `restore' insn can go into the delay slot. */
+ a `restore' or `return' insn can go into the delay slot. */
if (GET_CODE (SET_DEST (pat)) != REG
- || REGNO (SET_DEST (pat)) < 24
- || REGNO (SET_DEST (pat)) >= 32)
+ || REGNO (SET_DEST (pat)) < 24)
return 0;
- /* If it mentions %o7, it can't go in, because sibcall will clobber it
- in most cases. */
- if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
- return 0;
+ /* If this instruction sets up floating point register and we have a return
+ instruction, it can probably go in. But restore will not work
+ with FP_REGS. */
+ if (REGNO (SET_DEST (pat)) >= 32)
+ {
+ if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
+ && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
+ return 1;
+ return 0;
+ }
+
+ /* The set of insns matched here must agree precisely with the set of
+ patterns paired with a RETURN in sparc.md. */
src = SET_SRC (pat);
+ /* This matches "*return_[qhs]i" or even "*return_di" on TARGET_ARCH64. */
if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
&& arith_operand (src, GET_MODE (src)))
{
return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
}
+ /* This matches "*return_di". */
else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
&& arith_double_operand (src, GET_MODE (src)))
return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+ /* This matches "*return_sf_no_fpu". */
else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
&& register_operand (src, SFmode))
return 1;
+ /* If we have return instruction, anything that does not use
+ local or output registers and can go into a delay slot wins. */
+ else if (TARGET_V9 && ! epilogue_renumber (&pat, 1)
+ && (get_attr_in_uncond_branch_delay (trial) == IN_BRANCH_DELAY_TRUE))
+ return 1;
+
+ /* This matches "*return_addsi". */
else if (GET_CODE (src) == PLUS
&& arith_operand (XEXP (src, 0), SImode)
&& arith_operand (XEXP (src, 1), SImode)
|| register_operand (XEXP (src, 1), SImode)))
return 1;
+ /* This matches "*return_adddi". */
else if (GET_CODE (src) == PLUS
&& arith_double_operand (XEXP (src, 0), DImode)
&& arith_double_operand (XEXP (src, 1), DImode)
|| register_operand (XEXP (src, 1), DImode)))
return 1;
+ /* This can match "*return_losum_[sd]i".
+ Catch only some cases, so that return_losum* don't have
+ to be too big. */
else if (GET_CODE (src) == LO_SUM
&& ! TARGET_CM_MEDMID
&& ((register_operand (XEXP (src, 0), SImode)
&& immediate_operand (XEXP (src, 1), DImode))))
return 1;
+ /* sll{,x} reg,1,reg2 is add reg,reg,reg2 as well. */
else if (GET_CODE (src) == ASHIFT
&& (register_operand (XEXP (src, 0), SImode)
|| register_operand (XEXP (src, 0), DImode))
return 0;
}
-static int
-check_return_regs (x)
- rtx x;
-{
- switch (GET_CODE (x))
- {
- case REG:
- return IN_OR_GLOBAL_P (x);
-
- case CONST_INT:
- case CONST_DOUBLE:
- case CONST:
- case SYMBOL_REF:
- case LABEL_REF:
- return 1;
-
- case SET:
- case IOR:
- case AND:
- case XOR:
- case PLUS:
- case MINUS:
- if (check_return_regs (XEXP (x, 1)) == 0)
- return 0;
- case NOT:
- case NEG:
- case MEM:
- return check_return_regs (XEXP (x, 0));
-
- default:
- return 0;
- }
-
-}
+/* Return nonzero if TRIAL can go into the sibling call
+ delay slot. */
-/* Return 1 if TRIAL references only in and global registers. */
int
-eligible_for_return_delay (trial)
+eligible_for_sibcall_delay (trial)
rtx trial;
{
- if (GET_CODE (PATTERN (trial)) != SET)
+ rtx pat, src;
+
+ if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
return 0;
- return check_return_regs (PATTERN (trial));
-}
+ if (get_attr_length (trial) != 1)
+ return 0;
-int
-short_branch (uid1, uid2)
- int uid1, uid2;
-{
- int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
+ pat = PATTERN (trial);
- /* Leave a few words of "slop". */
- if (delta >= -1023 && delta <= 1022)
- return 1;
+ if (current_function_uses_only_leaf_regs)
+ {
+ /* If the tail call is done using the call instruction,
+ we have to restore %o7 in the delay slot. */
+ if ((TARGET_ARCH64 && ! TARGET_CM_MEDLOW) || flag_pic)
+ return 0;
- return 0;
-}
+ /* %g1 is used to build the function address */
+ if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
+ return 0;
-/* Return non-zero if REG is not used after INSN.
- We assume REG is a reload reg, and therefore does
- not live past labels or calls or jumps. */
-int
+ return 1;
+ }
+
+ /* Otherwise, only operations which can be done in tandem with
+ a `restore' insn can go into the delay slot. */
+ if (GET_CODE (SET_DEST (pat)) != REG
+ || REGNO (SET_DEST (pat)) < 24
+ || REGNO (SET_DEST (pat)) >= 32)
+ return 0;
+
+ /* If it mentions %o7, it can't go in, because sibcall will clobber it
+ in most cases. */
+ if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
+ return 0;
+
+ src = SET_SRC (pat);
+
+ if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
+ && arith_operand (src, GET_MODE (src)))
+ {
+ if (TARGET_ARCH64)
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+ else
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
+ }
+
+ else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
+ && arith_double_operand (src, GET_MODE (src)))
+ return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
+
+ else if (! TARGET_FPU && restore_operand (SET_DEST (pat), SFmode)
+ && register_operand (src, SFmode))
+ return 1;
+
+ else if (GET_CODE (src) == PLUS
+ && arith_operand (XEXP (src, 0), SImode)
+ && arith_operand (XEXP (src, 1), SImode)
+ && (register_operand (XEXP (src, 0), SImode)
+ || register_operand (XEXP (src, 1), SImode)))
+ return 1;
+
+ else if (GET_CODE (src) == PLUS
+ && arith_double_operand (XEXP (src, 0), DImode)
+ && arith_double_operand (XEXP (src, 1), DImode)
+ && (register_operand (XEXP (src, 0), DImode)
+ || register_operand (XEXP (src, 1), DImode)))
+ return 1;
+
+ else if (GET_CODE (src) == LO_SUM
+ && ! TARGET_CM_MEDMID
+ && ((register_operand (XEXP (src, 0), SImode)
+ && immediate_operand (XEXP (src, 1), SImode))
+ || (TARGET_ARCH64
+ && register_operand (XEXP (src, 0), DImode)
+ && immediate_operand (XEXP (src, 1), DImode))))
+ return 1;
+
+ else if (GET_CODE (src) == ASHIFT
+ && (register_operand (XEXP (src, 0), SImode)
+ || register_operand (XEXP (src, 0), DImode))
+ && XEXP (src, 1) == const1_rtx)
+ return 1;
+
+ return 0;
+}
+
+static int
+check_return_regs (x)
+ rtx x;
+{
+ switch (GET_CODE (x))
+ {
+ case REG:
+ return IN_OR_GLOBAL_P (x);
+
+ case CONST_INT:
+ case CONST_DOUBLE:
+ case CONST:
+ case SYMBOL_REF:
+ case LABEL_REF:
+ return 1;
+
+ case SET:
+ case IOR:
+ case AND:
+ case XOR:
+ case PLUS:
+ case MINUS:
+ if (check_return_regs (XEXP (x, 1)) == 0)
+ return 0;
+ case NOT:
+ case NEG:
+ case MEM:
+ return check_return_regs (XEXP (x, 0));
+
+ default:
+ return 0;
+ }
+
+}
+
+/* Return 1 if TRIAL references only in and global registers. */
+int
+eligible_for_return_delay (trial)
+ rtx trial;
+{
+ if (GET_CODE (PATTERN (trial)) != SET)
+ return 0;
+
+ return check_return_regs (PATTERN (trial));
+}
+
+int
+short_branch (uid1, uid2)
+ int uid1, uid2;
+{
+ int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
+
+ /* Leave a few words of "slop". */
+ if (delta >= -1023 && delta <= 1022)
+ return 1;
+
+ return 0;
+}
+
+/* Return non-zero if REG is not used after INSN.
+ We assume REG is a reload reg, and therefore does
+ not live past labels or calls or jumps. */
+int
reg_unused_after (reg, insn)
rtx reg;
rtx insn;
}
\f
/* Return 1 if RTX is a MEM which is known to be aligned to at
- least an 8 byte boundary. */
+ least a DESIRED byte boundary. */
int
mem_min_alignment (mem, desired)
int outgoing_args_size = (current_function_outgoing_args_size
+ REG_PARM_STACK_SPACE (current_function_decl));
- if (TARGET_EPILOGUE)
- {
- /* N_REGS is the number of 4-byte regs saved thus far. This applies
- even to v9 int regs to be consistent with save_regs/restore_regs. */
-
- if (TARGET_ARCH64)
- {
- for (i = 0; i < 8; i++)
- if (regs_ever_live[i] && ! call_used_regs[i])
- n_regs += 2;
- }
- else
- {
- for (i = 0; i < 8; i += 2)
- if ((regs_ever_live[i] && ! call_used_regs[i])
- || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
- n_regs += 2;
- }
+ /* N_REGS is the number of 4-byte regs saved thus far. This applies
+ even to v9 int regs to be consistent with save_regs/restore_regs. */
- for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
+ if (TARGET_ARCH64)
+ {
+ for (i = 0; i < 8; i++)
+ if (regs_ever_live[i] && ! call_used_regs[i])
+ n_regs += 2;
+ }
+ else
+ {
+ for (i = 0; i < 8; i += 2)
if ((regs_ever_live[i] && ! call_used_regs[i])
|| (regs_ever_live[i+1] && ! call_used_regs[i+1]))
n_regs += 2;
}
+ for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
+ if ((regs_ever_live[i] && ! call_used_regs[i])
+ || (regs_ever_live[i+1] && ! call_used_regs[i+1]))
+ n_regs += 2;
+
/* Set up values for use in `function_epilogue'. */
num_gfregs = n_regs;
base = frame_base_name;
}
- n_regs = 0;
- if (TARGET_EPILOGUE && ! leaf_function)
- /* ??? Originally saved regs 0-15 here. */
- n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
- else if (leaf_function)
- /* ??? Originally saved regs 0-31 here. */
- n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
- if (TARGET_EPILOGUE)
- save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
- real_offset);
- }
-
- leaf_label = 0;
- if (leaf_function && actual_fsize != 0)
- {
- /* warning ("leaf procedure with frame size %d", actual_fsize); */
- if (! TARGET_EPILOGUE)
- leaf_label = gen_label_rtx ();
+ n_regs = save_regs (file, 0, 8, base, offset, 0, real_offset);
+ save_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs,
+ real_offset);
}
}
static void
output_restore_regs (file, leaf_function)
FILE *file;
- int leaf_function;
+ int leaf_function ATTRIBUTE_UNUSED;
{
int offset, n_regs;
const char *base;
base = frame_base_name;
}
- n_regs = 0;
- if (TARGET_EPILOGUE && ! leaf_function)
- /* ??? Originally saved regs 0-15 here. */
- n_regs = restore_regs (file, 0, 8, base, offset, 0);
- else if (leaf_function)
- /* ??? Originally saved regs 0-31 here. */
- n_regs = restore_regs (file, 0, 8, base, offset, 0);
- if (TARGET_EPILOGUE)
- restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
+ n_regs = restore_regs (file, 0, 8, base, offset, 0);
+ restore_regs (file, 32, TARGET_V9 ? 96 : 64, base, offset, n_regs);
}
/* This function generates the assembly code for function exit,
{
const char *ret;
- if (leaf_label)
- {
- emit_label_after (leaf_label, get_last_insn ());
- final_scan_insn (get_last_insn (), file, 0, 0, 1);
- }
-
if (current_function_epilogue_delay_list == 0)
{
/* If code does not drop into the epilogue, we need
- do nothing except output pending case vectors. */
- rtx insn = get_last_insn ();
- if (GET_CODE (insn) == NOTE)
- insn = prev_nonnote_insn (insn);
- if (insn && GET_CODE (insn) == BARRIER)
- goto output_vectors;
+ do nothing except output pending case vectors.
+
+ We have to still output a dummy nop for the sake of
+ sane backtraces. Otherwise, if the last two instructions
+ of a function were call foo; dslot; this can make the return
+ PC of foo (ie. address of call instruction plus 8) point to
+ the first instruction in the next function. */
+ rtx insn;
+
+ fputs("\tnop\n", file);
+
+ insn = get_last_insn ();
+ if (GET_CODE (insn) == NOTE)
+ insn = prev_nonnote_insn (insn);
+ if (insn && GET_CODE (insn) == BARRIER)
+ goto output_vectors;
}
if (num_gfregs)
else
ret = (SKIP_CALLERS_UNIMP_P ? "jmp\t%i7+12" : "ret");
- if (TARGET_EPILOGUE || leaf_label)
+ if (! leaf_function)
{
- int old_target_epilogue = TARGET_EPILOGUE;
- target_flags &= ~old_target_epilogue;
+ if (current_function_calls_eh_return)
+ {
+ if (current_function_epilogue_delay_list)
+ abort ();
+ if (SKIP_CALLERS_UNIMP_P)
+ abort ();
- if (! leaf_function)
+ fputs ("\trestore\n\tretl\n\tadd\t%sp, %g1, %sp\n", file);
+ }
+ /* If we wound up with things in our delay slot, flush them here. */
+ else if (current_function_epilogue_delay_list)
{
- if (current_function_calls_eh_return)
- {
- if (current_function_epilogue_delay_list)
- abort ();
- if (SKIP_CALLERS_UNIMP_P)
- abort ();
+ rtx delay = PATTERN (XEXP (current_function_epilogue_delay_list, 0));
- fputs ("\trestore\n\tretl\n\tadd\t%sp, %g1, %sp\n", file);
+ if (TARGET_V9 && ! epilogue_renumber (&delay, 1))
+ {
+ epilogue_renumber (&delay, 0);
+ fputs (SKIP_CALLERS_UNIMP_P
+ ? "\treturn\t%i7+12\n"
+ : "\treturn\t%i7+8\n", file);
+ final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
+ file, 1, 0, 0);
}
- /* If we wound up with things in our delay slot, flush them here. */
- else if (current_function_epilogue_delay_list)
+ else
{
- rtx delay = PATTERN (XEXP (current_function_epilogue_delay_list, 0));
+ rtx insn, src;
- if (TARGET_V9 && ! epilogue_renumber (&delay, 1))
- {
- epilogue_renumber (&delay, 0);
- fputs (SKIP_CALLERS_UNIMP_P
- ? "\treturn\t%i7+12\n"
- : "\treturn\t%i7+8\n", file);
- final_scan_insn (XEXP (current_function_epilogue_delay_list, 0), file, 1, 0, 0);
- }
- else
- {
- rtx insn = emit_jump_insn_after (gen_rtx_RETURN (VOIDmode),
- get_last_insn ());
- rtx src;
+ if (GET_CODE (delay) != SET)
+ abort();
- if (GET_CODE (delay) != SET)
+ src = SET_SRC (delay);
+ if (GET_CODE (src) == ASHIFT)
+ {
+ if (XEXP (src, 1) != const1_rtx)
abort();
+ SET_SRC (delay)
+ = gen_rtx_PLUS (GET_MODE (src), XEXP (src, 0),
+ XEXP (src, 0));
+ }
- src = SET_SRC (delay);
- if (GET_CODE (src) == ASHIFT)
- {
- if (XEXP (src, 1) != const1_rtx)
- abort();
- SET_SRC (delay) = gen_rtx_PLUS (GET_MODE (src), XEXP (src, 0),
- XEXP (src, 0));
- }
+ insn = gen_rtx_PARALLEL (VOIDmode,
+ gen_rtvec (2, delay,
+ gen_rtx_RETURN (VOIDmode)));
+ insn = emit_jump_insn (insn);
- PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode,
- gen_rtvec (2, delay, PATTERN (insn)));
- final_scan_insn (insn, file, 1, 0, 1);
- }
+ sparc_emitting_epilogue = true;
+ final_scan_insn (insn, file, 1, 0, 1);
+ sparc_emitting_epilogue = false;
}
- else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
- fputs ("\treturn\t%i7+8\n\tnop\n", file);
- else
- fprintf (file, "\t%s\n\trestore\n", ret);
}
- else if (current_function_calls_eh_return)
+ else if (TARGET_V9 && ! SKIP_CALLERS_UNIMP_P)
+ fputs ("\treturn\t%i7+8\n\tnop\n", file);
+ else
+ fprintf (file, "\t%s\n\trestore\n", ret);
+ }
+ /* All of the following cases are for leaf functions. */
+ else if (current_function_calls_eh_return)
+ abort ();
+ else if (current_function_epilogue_delay_list)
+ {
+ /* eligible_for_epilogue_delay_slot ensures that if this is a
+ leaf function, then we will only have insn in the delay slot
+ if the frame size is zero, thus no adjust for the stack is
+ needed here. */
+ if (actual_fsize != 0)
abort ();
- /* All of the following cases are for leaf functions. */
- else if (current_function_epilogue_delay_list)
- {
- /* eligible_for_epilogue_delay_slot ensures that if this is a
- leaf function, then we will only have insn in the delay slot
- if the frame size is zero, thus no adjust for the stack is
- needed here. */
- if (actual_fsize != 0)
- abort ();
- fprintf (file, "\t%s\n", ret);
- final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
- file, 1, 0, 1);
- }
- /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
- avoid generating confusing assembly language output. */
- else if (actual_fsize == 0)
- fprintf (file, "\t%s\n\tnop\n", ret);
- else if (actual_fsize <= 4096)
- fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
- else if (actual_fsize <= 8192)
- fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
- ret, actual_fsize - 4096);
- else if ((actual_fsize & 0x3ff) == 0)
- fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
- actual_fsize, ret);
- else
- fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
- actual_fsize, actual_fsize, ret);
- target_flags |= old_target_epilogue;
+ fprintf (file, "\t%s\n", ret);
+ final_scan_insn (XEXP (current_function_epilogue_delay_list, 0),
+ file, 1, 0, 1);
}
+ /* Output 'nop' instead of 'sub %sp,-0,%sp' when no frame, so as to
+ avoid generating confusing assembly language output. */
+ else if (actual_fsize == 0)
+ fprintf (file, "\t%s\n\tnop\n", ret);
+ else if (actual_fsize <= 4096)
+ fprintf (file, "\t%s\n\tsub\t%%sp, -%d, %%sp\n", ret, actual_fsize);
+ else if (actual_fsize <= 8192)
+ fprintf (file, "\tsub\t%%sp, -4096, %%sp\n\t%s\n\tsub\t%%sp, -%d, %%sp\n",
+ ret, actual_fsize - 4096);
+ else if ((actual_fsize & 0x3ff) == 0)
+ fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
+ actual_fsize, ret);
+ else
+ fprintf (file, "\tsethi\t%%hi(%d), %%g1\n\tor\t%%g1, %%lo(%d), %%g1\n\t%s\n\tadd\t%%sp, %%g1, %%sp\n",
+ actual_fsize, actual_fsize, ret);
output_vectors:
sparc_output_deferred_case_vectors ();
if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
function_arg_record_value_1 (TREE_TYPE (field), bitpos, parms);
- else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+ else if ((TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+ || (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE
+ && (TREE_CODE (TREE_TYPE (TREE_TYPE (field)))
+ == REAL_TYPE)))
&& TARGET_FPU
&& ! packed_p
&& parms->named)
/* There's no need to check this_slotno < SPARC_FP_ARG MAX.
If it wasn't true we wouldn't be here. */
parms->nregs += 1;
+ if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
+ parms->nregs += 1;
}
else
{
if (TREE_CODE (TREE_TYPE (field)) == RECORD_TYPE)
function_arg_record_value_2 (TREE_TYPE (field), bitpos, parms);
- else if (TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+ else if ((TREE_CODE (TREE_TYPE (field)) == REAL_TYPE
+ || (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE
+ && (TREE_CODE (TREE_TYPE (TREE_TYPE (field)))
+ == REAL_TYPE)))
&& TARGET_FPU
&& ! packed_p
&& parms->named)
{
int this_slotno = parms->slotno + bitpos / BITS_PER_WORD;
+ int regno;
+ enum machine_mode mode = DECL_MODE (field);
rtx reg;
function_arg_record_value_3 (bitpos, parms);
-
- reg = gen_rtx_REG (DECL_MODE (field),
- (SPARC_FP_ARG_FIRST + this_slotno * 2
- + (DECL_MODE (field) == SFmode
- && (bitpos & 32) != 0)));
+ regno = SPARC_FP_ARG_FIRST + this_slotno * 2
+ + ((mode == SFmode || mode == SCmode)
+ && (bitpos & 32) != 0);
+ switch (mode)
+ {
+ case SCmode: mode = SFmode; break;
+ case DCmode: mode = DFmode; break;
+ case TCmode: mode = TFmode; break;
+ default: break;
+ }
+ reg = gen_rtx_REG (mode, regno);
XVECEXP (parms->ret, 0, parms->nregs)
= gen_rtx_EXPR_LIST (VOIDmode, reg,
GEN_INT (bitpos / BITS_PER_UNIT));
parms->nregs += 1;
+ if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
+ {
+ regno += GET_MODE_SIZE (mode) / 4;
+ reg = gen_rtx_REG (mode, regno);
+ XVECEXP (parms->ret, 0, parms->nregs)
+ = gen_rtx_EXPR_LIST (VOIDmode, reg,
+ GEN_INT ((bitpos + GET_MODE_BITSIZE (mode))
+ / BITS_PER_UNIT));
+ parms->nregs += 1;
+ }
}
else
{
return ((type && TREE_CODE (type) == ARRAY_TYPE)
/* Consider complex values as aggregates, so care for TCmode. */
|| GET_MODE_SIZE (mode) > 16
- || (type && AGGREGATE_TYPE_P (type)
- && int_size_in_bytes (type) > 16));
+ || (type
+ && AGGREGATE_TYPE_P (type)
+ && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16));
}
}
if (AGGREGATE_TYPE_P (type))
{
- if (size > 16)
+ if ((unsigned HOST_WIDE_INT) size > 16)
{
indirect = 1;
size = rsize = UNITS_PER_WORD;
}
+ /* SPARC v9 ABI states that structures up to 8 bytes in size are
+ given one 8 byte slot. */
+ else if (size == 0)
+ size = rsize = UNITS_PER_WORD;
else
size = rsize;
}
INSN, if set, is the insn. */
char *
-output_cbranch (op, label, reversed, annul, noop, insn)
- rtx op;
+output_cbranch (op, dest, label, reversed, annul, noop, insn)
+ rtx op, dest;
int label;
int reversed, annul, noop;
rtx insn;
{
- static char string[32];
+ static char string[50];
enum rtx_code code = GET_CODE (op);
rtx cc_reg = XEXP (op, 0);
enum machine_mode mode = GET_MODE (cc_reg);
- static char v8_labelno[] = "%lX";
- static char v9_icc_labelno[] = "%%icc, %lX";
- static char v9_xcc_labelno[] = "%%xcc, %lX";
- static char v9_fcc_labelno[] = "%%fccX, %lY";
- char *labelno;
- const char *branch;
- int labeloff, spaces = 8;
+ const char *labelno, *branch;
+ int spaces = 8, far;
+ char *p;
+
+ /* v9 branches are limited to +-1MB. If it is too far away,
+ change
- if (reversed)
+ bne,pt %xcc, .LC30
+
+ to
+
+ be,pn %xcc, .+12
+ nop
+ ba .LC30
+
+ and
+
+ fbne,a,pn %fcc2, .LC29
+
+ to
+
+ fbe,pt %fcc2, .+16
+ nop
+ ba .LC29 */
+
+ far = get_attr_length (insn) >= 3;
+ if (reversed ^ far)
{
/* Reversal of FP compares takes care -- an ordered compare
becomes an unordered compare and vice versa. */
branch = "be";
break;
case GE:
- if (mode == CC_NOOVmode)
+ if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
branch = "bpos";
else
branch = "bge";
branch = "ble";
break;
case LT:
- if (mode == CC_NOOVmode)
+ if (mode == CC_NOOVmode || mode == CCX_NOOVmode)
branch = "bneg";
else
branch = "bl";
strcpy (string, branch);
}
spaces -= strlen (branch);
+ p = strchr (string, '\0');
/* Now add the annulling, the label, and a possible noop. */
- if (annul)
+ if (annul && ! far)
{
- strcat (string, ",a");
+ strcpy (p, ",a");
+ p += 2;
spaces -= 2;
}
if (! TARGET_V9)
- {
- labeloff = 2;
- labelno = v8_labelno;
- }
+ labelno = "";
else
{
rtx note;
+ int v8 = 0;
- if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
+ if (! far && insn && INSN_ADDRESSES_SET_P ())
{
- strcat (string,
- INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
- spaces -= 3;
+ int delta = (INSN_ADDRESSES (INSN_UID (dest))
+ - INSN_ADDRESSES (INSN_UID (insn)));
+ /* Leave some instructions for "slop". */
+ if (delta < -260000 || delta >= 260000)
+ v8 = 1;
}
- labeloff = 9;
if (mode == CCFPmode || mode == CCFPEmode)
{
- labeloff = 10;
- labelno = v9_fcc_labelno;
+ static char v9_fcc_labelno[] = "%%fccX, ";
/* Set the char indicating the number of the fcc reg to use. */
- labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
+ v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
+ labelno = v9_fcc_labelno;
+ if (v8)
+ {
+ if (REGNO (cc_reg) == SPARC_FCC_REG)
+ labelno = "";
+ else
+ abort ();
+ }
}
else if (mode == CCXmode || mode == CCX_NOOVmode)
- labelno = v9_xcc_labelno;
+ {
+ labelno = "%%xcc, ";
+ if (v8)
+ abort ();
+ }
else
- labelno = v9_icc_labelno;
+ {
+ labelno = "%%icc, ";
+ if (v8)
+ labelno = "";
+ }
+
+ if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
+ {
+ strcpy (p,
+ ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
+ ? ",pt" : ",pn");
+ p += 3;
+ spaces -= 3;
+ }
}
- /* Set the char indicating the number of the operand containing the
- label_ref. */
- labelno[labeloff] = label + '0';
if (spaces > 0)
- strcat (string, "\t");
+ *p++ = '\t';
else
- strcat (string, " ");
- strcat (string, labelno);
-
+ *p++ = ' ';
+ strcpy (p, labelno);
+ p = strchr (p, '\0');
+ if (far)
+ {
+ strcpy (p, ".+12\n\tnop\n\tb\t");
+ if (annul || noop)
+ p[3] = '6';
+ p += 13;
+ }
+ *p++ = '%';
+ *p++ = 'l';
+ /* Set the char indicating the number of the operand containing the
+ label_ref. */
+ *p++ = label + '0';
+ *p = '\0';
if (noop)
- strcat (string, "\n\tnop");
+ strcpy (p, "\n\tnop");
return string;
}
else
slot1 = y;
- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), 1,
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
DImode, 2,
XEXP (slot0, 0), Pmode,
XEXP (slot1, 0), Pmode);
}
else
{
- emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), 1,
+ emit_library_call (gen_rtx_SYMBOL_REF (Pmode, qpfunc), LCT_NORMAL,
SImode, 2,
x, TFmode, y, TFmode);
}
}
+/* Generate an unsigned DImode to FP conversion. This is the same code
+ optabs would emit if we didn't have TFmode patterns. */
+
+void
+sparc_emit_floatunsdi (operands)
+ rtx operands[2];
+{
+ rtx neglab, donelab, i0, i1, f0, in, out;
+ enum machine_mode mode;
+
+ out = operands[0];
+ in = force_reg (DImode, operands[1]);
+ mode = GET_MODE (out);
+ neglab = gen_label_rtx ();
+ donelab = gen_label_rtx ();
+ i0 = gen_reg_rtx (DImode);
+ i1 = gen_reg_rtx (DImode);
+ f0 = gen_reg_rtx (mode);
+
+ emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
+
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
+ emit_jump_insn (gen_jump (donelab));
+ emit_barrier ();
+
+ emit_label (neglab);
+
+ emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
+ emit_insn (gen_anddi3 (i1, in, const1_rtx));
+ emit_insn (gen_iordi3 (i0, i0, i1));
+ emit_insn (gen_rtx_SET (VOIDmode, f0, gen_rtx_FLOAT (mode, i0)));
+ emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
+
+ emit_label (donelab);
+}
+
/* Return the string to output a conditional branch to LABEL, testing
register REG. LABEL is the operand number of the label; REG is the
operand number of the reg. OP is the conditional expression. The mode
NOOP is non-zero if we have to follow this branch by a noop. */
char *
-output_v9branch (op, reg, label, reversed, annul, noop, insn)
- rtx op;
+output_v9branch (op, dest, reg, label, reversed, annul, noop, insn)
+ rtx op, dest;
int reg, label;
int reversed, annul, noop;
rtx insn;
{
- static char string[20];
+ static char string[50];
enum rtx_code code = GET_CODE (op);
enum machine_mode mode = GET_MODE (XEXP (op, 0));
- static char labelno[] = "%X, %lX";
rtx note;
- int spaces = 8;
+ int far;
+ char *p;
+
+ /* branch on register are limited to +-128KB. If it is too far away,
+ change
+
+ brnz,pt %g1, .LC30
+
+ to
+
+ brz,pn %g1, .+12
+ nop
+ ba,pt %xcc, .LC30
+
+ and
+
+ brgez,a,pn %o1, .LC29
+
+ to
+
+ brlz,pt %o1, .+16
+ nop
+ ba,pt %xcc, .LC29 */
+
+ far = get_attr_length (insn) >= 3;
/* If not floating-point or if EQ or NE, we can just reverse the code. */
- if (reversed)
- code = reverse_condition (code), reversed = 0;
+ if (reversed ^ far)
+ code = reverse_condition (code);
/* Only 64 bit versions of these instructions exist. */
if (mode != DImode)
{
case NE:
strcpy (string, "brnz");
- spaces -= 4;
break;
case EQ:
strcpy (string, "brz");
- spaces -= 3;
break;
case GE:
strcpy (string, "brgez");
- spaces -= 5;
break;
case LT:
strcpy (string, "brlz");
- spaces -= 4;
break;
case LE:
strcpy (string, "brlez");
- spaces -= 5;
break;
case GT:
strcpy (string, "brgz");
- spaces -= 4;
break;
default:
abort ();
}
+ p = strchr (string, '\0');
+
/* Now add the annulling, reg, label, and nop. */
- if (annul)
+ if (annul && ! far)
{
- strcat (string, ",a");
- spaces -= 2;
+ strcpy (p, ",a");
+ p += 2;
}
- if (insn && (note = find_reg_note (insn, REG_BR_PRED, NULL_RTX)))
+ if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
{
- strcat (string,
- INTVAL (XEXP (note, 0)) & ATTR_FLAG_likely ? ",pt" : ",pn");
- spaces -= 3;
+ strcpy (p,
+ ((INTVAL (XEXP (note, 0)) >= REG_BR_PROB_BASE / 2) ^ far)
+ ? ",pt" : ",pn");
+ p += 3;
}
- labelno[1] = reg + '0';
- labelno[6] = label + '0';
- if (spaces > 0)
- strcat (string, "\t");
- else
- strcat (string, " ");
- strcat (string, labelno);
-
- if (noop)
- strcat (string, "\n\tnop");
+ *p = p < string + 8 ? '\t' : ' ';
+ p++;
+ *p++ = '%';
+ *p++ = '0' + reg;
+ *p++ = ',';
+ *p++ = ' ';
+ if (far)
+ {
+ int veryfar = 1, delta;
- return string;
-}
+ if (INSN_ADDRESSES_SET_P ())
+ {
+ delta = (INSN_ADDRESSES (INSN_UID (dest))
+ - INSN_ADDRESSES (INSN_UID (insn)));
+ /* Leave some instructions for "slop". */
+ if (delta >= -260000 && delta < 260000)
+ veryfar = 0;
+ }
-/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
- Such instructions cannot be used in the delay slot of return insn on v9.
+ strcpy (p, ".+12\n\tnop\n\t");
+ if (annul || noop)
+ p[3] = '6';
+ p += 11;
+ if (veryfar)
+ {
+ strcpy (p, "b\t");
+ p += 2;
+ }
+ else
+ {
+ strcpy (p, "ba,pt\t%%xcc, ");
+ p += 13;
+ }
+ }
+ *p++ = '%';
+ *p++ = 'l';
+ *p++ = '0' + label;
+ *p = '\0';
+
+ if (noop)
+ strcpy (p, "\n\tnop");
+
+ return string;
+}
+
+/* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
+ Such instructions cannot be used in the delay slot of return insn on v9.
If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
*/
}
return 0;
}
-
-/* Output assembler code to return from a function. */
-
-const char *
-output_return (operands)
- rtx *operands;
-{
- rtx delay = final_sequence ? XVECEXP (final_sequence, 0, 1) : 0;
-
- if (leaf_label)
- {
- operands[0] = leaf_label;
- return "b%* %l0%(";
- }
- else if (current_function_uses_only_leaf_regs)
- {
- /* No delay slot in a leaf function. */
- if (delay)
- abort ();
-
- /* If we didn't allocate a frame pointer for the current function,
- the stack pointer might have been adjusted. Output code to
- restore it now. */
-
- operands[0] = GEN_INT (actual_fsize);
-
- /* Use sub of negated value in first two cases instead of add to
- allow actual_fsize == 4096. */
-
- if (actual_fsize <= 4096)
- {
- if (SKIP_CALLERS_UNIMP_P)
- return "jmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
- else
- return "retl\n\tsub\t%%sp, -%0, %%sp";
- }
- else if (actual_fsize <= 8192)
- {
- operands[0] = GEN_INT (actual_fsize - 4096);
- if (SKIP_CALLERS_UNIMP_P)
- return "sub\t%%sp, -4096, %%sp\n\tjmp\t%%o7+12\n\tsub\t%%sp, -%0, %%sp";
- else
- return "sub\t%%sp, -4096, %%sp\n\tretl\n\tsub\t%%sp, -%0, %%sp";
- }
- else if (SKIP_CALLERS_UNIMP_P)
- {
- if ((actual_fsize & 0x3ff) != 0)
- return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
- else
- return "sethi\t%%hi(%a0), %%g1\n\tjmp\t%%o7+12\n\tadd\t%%sp, %%g1, %%sp";
- }
- else
- {
- if ((actual_fsize & 0x3ff) != 0)
- return "sethi\t%%hi(%a0), %%g1\n\tor\t%%g1, %%lo(%a0), %%g1\n\tretl\n\tadd\t%%sp, %%g1, %%sp";
- else
- return "sethi\t%%hi(%a0), %%g1\n\tretl\n\tadd\t%%sp, %%g1, %%sp";
- }
- }
- else if (TARGET_V9)
- {
- if (delay)
- {
- epilogue_renumber (&SET_DEST (PATTERN (delay)), 0);
- epilogue_renumber (&SET_SRC (PATTERN (delay)), 0);
- }
- if (SKIP_CALLERS_UNIMP_P)
- return "return\t%%i7+12%#";
- else
- return "return\t%%i7+8%#";
- }
- else
- {
- if (delay)
- abort ();
- if (SKIP_CALLERS_UNIMP_P)
- return "jmp\t%%i7+12\n\trestore";
- else
- return "ret\n\trestore";
- }
-}
\f
/* Leaf functions and non-leaf functions have different needs. */
ld [%o0 + 4], %o1
to
ldd [%o0], %o0
+ nor:
+ ld [%g3 + 4], %g3
+ ld [%g3], %g2
+ to
+ ldd [%g3], %g2
+
+ But, note that the transformation from:
+ ld [%g2 + 4], %g3
+ ld [%g2], %g2
+ to
+ ldd [%g2], %g2
+ is perfectly fine. Thus, the peephole2 patterns always pass us
+ the destination register of the first load, never the second one.
+
For stores we don't have a similar problem, so dependent_reg_rtx is
NULL_RTX. */
case 'b':
{
/* Print a sign-extended character. */
- int i = INTVAL (x) & 0xff;
- if (i & 0x80)
- i |= 0xffffff00;
+ int i = trunc_int_for_mode (INTVAL (x), QImode);
fprintf (file, "%d", i);
return;
}
what kind of result this function returns. For non-C types, we pick
the closest C type. */
-#ifndef CHAR_TYPE_SIZE
-#define CHAR_TYPE_SIZE BITS_PER_UNIT
-#endif
-
#ifndef SHORT_TYPE_SIZE
#define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
#endif
*/
#ifdef TRANSFER_FROM_TRAMPOLINE
emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
- 0, VOIDmode, 1, tramp, Pmode);
+ LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
#endif
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
- expand_binop (SImode, ior_optab,
- expand_shift (RSHIFT_EXPR, SImode, fnaddr,
- size_int (10), 0, 1),
- GEN_INT (0x03000000),
- NULL_RTX, 1, OPTAB_DIRECT));
-
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
- expand_binop (SImode, ior_optab,
- expand_shift (RSHIFT_EXPR, SImode, cxt,
- size_int (10), 0, 1),
- GEN_INT (0x05000000),
- NULL_RTX, 1, OPTAB_DIRECT));
+ emit_move_insn
+ (gen_rtx_MEM (SImode, plus_constant (tramp, 0)),
+ expand_binop (SImode, ior_optab,
+ expand_shift (RSHIFT_EXPR, SImode, fnaddr,
+ size_int (10), 0, 1),
+ GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn
+ (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
+ expand_binop (SImode, ior_optab,
+ expand_shift (RSHIFT_EXPR, SImode, cxt,
+ size_int (10), 0, 1),
+ GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn
+ (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
+ expand_binop (SImode, ior_optab,
+ expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
+ GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
+ NULL_RTX, 1, OPTAB_DIRECT));
+
+ emit_move_insn
+ (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
+ expand_binop (SImode, ior_optab,
+ expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
+ GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
+ NULL_RTX, 1, OPTAB_DIRECT));
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
- expand_binop (SImode, ior_optab,
- expand_and (fnaddr, GEN_INT (0x3ff), NULL_RTX),
- GEN_INT (0x81c06000),
- NULL_RTX, 1, OPTAB_DIRECT));
-
- emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
- expand_binop (SImode, ior_optab,
- expand_and (cxt, GEN_INT (0x3ff), NULL_RTX),
- GEN_INT (0x8410a000),
- NULL_RTX, 1, OPTAB_DIRECT));
-
- emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
/* On UltraSPARC a flush flushes an entire cache line. The trampoline is
aligned on a 16 byte boundary so one flush clears it all. */
- if (sparc_cpu != PROCESSOR_ULTRASPARC)
+ emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode, tramp))));
+ if (sparc_cpu != PROCESSOR_ULTRASPARC
+ && sparc_cpu != PROCESSOR_ULTRASPARC3)
emit_insn (gen_flush (validize_mem (gen_rtx_MEM (SImode,
plus_constant (tramp, 8)))));
}
{
#ifdef TRANSFER_FROM_TRAMPOLINE
emit_library_call (gen_rtx (SYMBOL_REF, Pmode, "__enable_execute_stack"),
- 0, VOIDmode, 1, tramp, Pmode);
+ LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
#endif
/*
*/
emit_move_insn (gen_rtx_MEM (SImode, tramp),
- GEN_INT (0x83414000));
+ GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
- GEN_INT (0xca586018));
+ GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
- GEN_INT (0x81c14000));
+ GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
- GEN_INT (0xca586010));
+ GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), cxt);
emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 24)), fnaddr);
emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, tramp))));
- if (sparc_cpu != PROCESSOR_ULTRASPARC)
+ if (sparc_cpu != PROCESSOR_ULTRASPARC
+ && sparc_cpu != PROCESSOR_ULTRASPARC3)
emit_insn (gen_flushdi (validize_mem (gen_rtx_MEM (DImode, plus_constant (tramp, 8)))));
}
\f
case TYPE_LOAD:
case TYPE_SLOAD:
case TYPE_FPLOAD:
- /* If a load, then the dependence must be on the memory address. If
- the addresses aren't equal, then it might be a false dependency */
- if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
- {
- if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
- || GET_CODE (SET_DEST (dep_pat)) != MEM
- || GET_CODE (SET_SRC (pat)) != MEM
- || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
- XEXP (SET_SRC (pat), 0)))
- return cost + 2;
-
- return cost + 8;
- }
- break;
-
- case TYPE_BRANCH:
- /* Compare to branch latency is 0. There is no benefit from
- separating compare and branch. */
- if (dep_type == TYPE_COMPARE)
- return 0;
- /* Floating point compare to branch latency is less than
- compare to conditional move. */
- if (dep_type == TYPE_FPCMP)
- return cost - 1;
- break;
- default:
- break;
- }
- break;
-
- case REG_DEP_ANTI:
- /* Anti-dependencies only penalize the fpu unit. */
- if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
- return 0;
- break;
-
- default:
- break;
- }
-
- return cost;
-}
-
-static int
-ultrasparc_adjust_cost (insn, link, dep_insn, cost)
- rtx insn;
- rtx link;
- rtx dep_insn;
- int cost;
-{
- enum attr_type insn_type, dep_type;
- rtx pat = PATTERN(insn);
- rtx dep_pat = PATTERN (dep_insn);
-
- if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
- return cost;
-
- insn_type = get_attr_type (insn);
- dep_type = get_attr_type (dep_insn);
-
- /* Nothing issues in parallel with integer multiplies, so
- mark as zero cost since the scheduler can not do anything
- about it. */
- if (insn_type == TYPE_IMUL || insn_type == TYPE_IDIV)
- return 0;
-
-#define SLOW_FP(dep_type) \
-(dep_type == TYPE_FPSQRTS || dep_type == TYPE_FPSQRTD || \
- dep_type == TYPE_FPDIVS || dep_type == TYPE_FPDIVD)
-
- switch (REG_NOTE_KIND (link))
- {
- case 0:
- /* Data dependency; DEP_INSN writes a register that INSN reads some
- cycles later. */
-
- if (dep_type == TYPE_CMOVE)
- {
- /* Instructions that read the result of conditional moves cannot
- be in the same group or the following group. */
- return cost + 1;
- }
-
- switch (insn_type)
- {
- /* UltraSPARC can dual issue a store and an instruction setting
- the value stored, except for divide and square root. */
- case TYPE_FPSTORE:
- if (! SLOW_FP (dep_type))
- return 0;
- return cost;
-
- case TYPE_STORE:
- if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
- return cost;
-
- if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
- /* The dependency between the two instructions is on the data
- that is being stored. Assume that the address of the store
- is not also dependent. */
- return 0;
- return cost;
-
- case TYPE_LOAD:
- case TYPE_SLOAD:
- case TYPE_FPLOAD:
- /* A load does not return data until at least 11 cycles after
- a store to the same location. 3 cycles are accounted for
- in the load latency; add the other 8 here. */
- if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
- {
- /* If the addresses are not equal this may be a false
- dependency because pointer aliasing could not be
- determined. Add only 2 cycles in that case. 2 is
- an arbitrary compromise between 8, which would cause
- the scheduler to generate worse code elsewhere to
- compensate for a dependency which might not really
- exist, and 0. */
- if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
- || GET_CODE (SET_SRC (pat)) != MEM
- || GET_CODE (SET_DEST (dep_pat)) != MEM
- || ! rtx_equal_p (XEXP (SET_SRC (pat), 0),
- XEXP (SET_DEST (dep_pat), 0)))
- return cost + 2;
-
- return cost + 8;
- }
- return cost;
-
- case TYPE_BRANCH:
- /* Compare to branch latency is 0. There is no benefit from
- separating compare and branch. */
- if (dep_type == TYPE_COMPARE)
- return 0;
- /* Floating point compare to branch latency is less than
- compare to conditional move. */
- if (dep_type == TYPE_FPCMP)
- return cost - 1;
- return cost;
-
- case TYPE_FPCMOVE:
- /* FMOVR class instructions can not issue in the same cycle
- or the cycle after an instruction which writes any
- integer register. Model this as cost 2 for dependent
- instructions. */
- if (dep_type == TYPE_IALU
- && cost < 2)
- return 2;
- /* Otherwise check as for integer conditional moves. */
-
- case TYPE_CMOVE:
- /* Conditional moves involving integer registers wait until
- 3 cycles after loads return data. The interlock applies
- to all loads, not just dependent loads, but that is hard
- to model. */
- if (dep_type == TYPE_LOAD || dep_type == TYPE_SLOAD)
- return cost + 3;
- return cost;
-
- default:
- break;
- }
- break;
-
- case REG_DEP_ANTI:
- /* Divide and square root lock destination registers for full latency. */
- if (! SLOW_FP (dep_type))
- return 0;
- break;
-
- case REG_DEP_OUTPUT:
- /* IEU and FPU instruction that have the same destination
- register cannot be grouped together. */
- return cost + 1;
-
- default:
- break;
- }
-
- /* Other costs not accounted for:
- - Single precision floating point loads lock the other half of
- the even/odd register pair.
- - Several hazards associated with ldd/std are ignored because these
- instructions are rarely generated for V9.
- - The floating point pipeline can not have both a single and double
- precision operation active at the same time. Format conversions
- and graphics instructions are given honorary double precision status.
- - call and jmpl are always the first instruction in a group. */
-
- return cost;
-
-#undef SLOW_FP
-}
-
-static int
-sparc_adjust_cost(insn, link, dep, cost)
- rtx insn;
- rtx link;
- rtx dep;
- int cost;
-{
- switch (sparc_cpu)
- {
- case PROCESSOR_SUPERSPARC:
- cost = supersparc_adjust_cost (insn, link, dep, cost);
- break;
- case PROCESSOR_HYPERSPARC:
- case PROCESSOR_SPARCLITE86X:
- cost = hypersparc_adjust_cost (insn, link, dep, cost);
- break;
- case PROCESSOR_ULTRASPARC:
- cost = ultrasparc_adjust_cost (insn, link, dep, cost);
- break;
- default:
- break;
- }
- return cost;
-}
-
-/* This describes the state of the UltraSPARC pipeline during
- instruction scheduling. */
-
-#define TMASK(__x) ((unsigned)1 << ((int)(__x)))
-#define UMASK(__x) ((unsigned)1 << ((int)(__x)))
-
-enum ultra_code { NONE=0, /* no insn at all */
- IEU0, /* shifts and conditional moves */
- IEU1, /* condition code setting insns, calls+jumps */
- IEUN, /* all other single cycle ieu insns */
- LSU, /* loads and stores */
- CTI, /* branches */
- FPM, /* FPU pipeline 1, multiplies and divides */
- FPA, /* FPU pipeline 2, all other operations */
- SINGLE, /* single issue instructions */
- NUM_ULTRA_CODES };
-
-static enum ultra_code ultra_code_from_mask PARAMS ((int));
-static void ultra_schedule_insn PARAMS ((rtx *, rtx *, int, enum ultra_code));
-
-static const char *const ultra_code_names[NUM_ULTRA_CODES] = {
- "NONE", "IEU0", "IEU1", "IEUN", "LSU", "CTI",
- "FPM", "FPA", "SINGLE" };
-
-struct ultrasparc_pipeline_state {
- /* The insns in this group. */
- rtx group[4];
-
- /* The code for each insn. */
- enum ultra_code codes[4];
-
- /* Which insns in this group have been committed by the
- scheduler. This is how we determine how many more
- can issue this cycle. */
- char commit[4];
-
- /* How many insns in this group. */
- char group_size;
-
- /* Mask of free slots still in this group. */
- char free_slot_mask;
-
- /* The slotter uses the following to determine what other
- insn types can still make their way into this group. */
- char contents [NUM_ULTRA_CODES];
- char num_ieu_insns;
-};
-
-#define ULTRA_NUM_HIST 8
-static struct ultrasparc_pipeline_state ultra_pipe_hist[ULTRA_NUM_HIST];
-static int ultra_cur_hist;
-static int ultra_cycles_elapsed;
-
-#define ultra_pipe (ultra_pipe_hist[ultra_cur_hist])
-
-/* Given TYPE_MASK compute the ultra_code it has. */
-static enum ultra_code
-ultra_code_from_mask (type_mask)
- int type_mask;
-{
- if (type_mask & (TMASK (TYPE_SHIFT) | TMASK (TYPE_CMOVE)))
- return IEU0;
- else if (type_mask & (TMASK (TYPE_COMPARE) |
- TMASK (TYPE_CALL) |
- TMASK (TYPE_SIBCALL) |
- TMASK (TYPE_UNCOND_BRANCH)))
- return IEU1;
- else if (type_mask & TMASK (TYPE_IALU))
- return IEUN;
- else if (type_mask & (TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
- TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
- TMASK (TYPE_FPSTORE)))
- return LSU;
- else if (type_mask & (TMASK (TYPE_FPMUL) | TMASK (TYPE_FPDIVS) |
- TMASK (TYPE_FPDIVD) | TMASK (TYPE_FPSQRTS) |
- TMASK (TYPE_FPSQRTD)))
- return FPM;
- else if (type_mask & (TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
- TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)))
- return FPA;
- else if (type_mask & TMASK (TYPE_BRANCH))
- return CTI;
-
- return SINGLE;
-}
-
-/* Check INSN (a conditional move) and make sure that it's
- results are available at this cycle. Return 1 if the
- results are in fact ready. */
-static int
-ultra_cmove_results_ready_p (insn)
- rtx insn;
-{
- struct ultrasparc_pipeline_state *up;
- int entry, slot;
-
- /* If this got dispatched in the previous
- group, the results are not ready. */
- entry = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
- up = &ultra_pipe_hist[entry];
- slot = 4;
- while (--slot >= 0)
- if (up->group[slot] == insn)
- return 0;
-
- return 1;
-}
-
-/* Walk backwards in pipeline history looking for FPU
- operations which use a mode different than FPMODE and
- will create a stall if an insn using FPMODE were to be
- dispatched this cycle. */
-static int
-ultra_fpmode_conflict_exists (fpmode)
- enum machine_mode fpmode;
-{
- int hist_ent;
- int hist_lim;
-
- hist_ent = (ultra_cur_hist - 1) & (ULTRA_NUM_HIST - 1);
- if (ultra_cycles_elapsed < 4)
- hist_lim = ultra_cycles_elapsed;
- else
- hist_lim = 4;
- while (hist_lim > 0)
- {
- struct ultrasparc_pipeline_state *up = &ultra_pipe_hist[hist_ent];
- int slot = 4;
-
- while (--slot >= 0)
- {
- rtx insn = up->group[slot];
- enum machine_mode this_mode;
- rtx pat;
-
- if (! insn
- || GET_CODE (insn) != INSN
- || (pat = PATTERN (insn)) == 0
- || GET_CODE (pat) != SET)
- continue;
-
- this_mode = GET_MODE (SET_DEST (pat));
- if ((this_mode != SFmode
- && this_mode != DFmode)
- || this_mode == fpmode)
- continue;
-
- /* If it is not FMOV, FABS, FNEG, FDIV, or FSQRT then
- we will get a stall. Loads and stores are independent
- of these rules. */
- if (GET_CODE (SET_SRC (pat)) != ABS
- && GET_CODE (SET_SRC (pat)) != NEG
- && ((TMASK (get_attr_type (insn)) &
- (TMASK (TYPE_FPDIVS) | TMASK (TYPE_FPDIVD) |
- TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPSQRTS) |
- TMASK (TYPE_FPSQRTD) |
- TMASK (TYPE_LOAD) | TMASK (TYPE_STORE))) == 0))
- return 1;
- }
- hist_lim--;
- hist_ent = (hist_ent - 1) & (ULTRA_NUM_HIST - 1);
- }
-
- /* No conflicts, safe to dispatch. */
- return 0;
-}
-
-/* Find an instruction in LIST which has one of the
- type attributes enumerated in TYPE_MASK. START
- says where to begin the search.
-
- NOTE: This scheme depends upon the fact that we
- have less than 32 distinct type attributes. */
-
-static int ultra_types_avail;
-
-static rtx *
-ultra_find_type (type_mask, list, start)
- int type_mask;
- rtx *list;
- int start;
-{
- int i;
-
- /* Short circuit if no such insn exists in the ready
- at the moment. */
- if ((type_mask & ultra_types_avail) == 0)
- return 0;
-
- for (i = start; i >= 0; i--)
- {
- rtx insn = list[i];
-
- if (recog_memoized (insn) >= 0
- && (TMASK(get_attr_type (insn)) & type_mask))
- {
- enum machine_mode fpmode = SFmode;
- rtx pat = 0;
- int slot;
- int check_depend = 0;
- int check_fpmode_conflict = 0;
-
- if (GET_CODE (insn) == INSN
- && (pat = PATTERN(insn)) != 0
- && GET_CODE (pat) == SET
- && !(type_mask & (TMASK (TYPE_STORE) |
- TMASK (TYPE_FPSTORE))))
- {
- check_depend = 1;
- if (GET_MODE (SET_DEST (pat)) == SFmode
- || GET_MODE (SET_DEST (pat)) == DFmode)
- {
- fpmode = GET_MODE (SET_DEST (pat));
- check_fpmode_conflict = 1;
- }
- }
-
- slot = 4;
- while(--slot >= 0)
- {
- rtx slot_insn = ultra_pipe.group[slot];
- rtx slot_pat;
-
- /* Already issued, bad dependency, or FPU
- mode conflict. */
- if (slot_insn != 0
- && (slot_pat = PATTERN (slot_insn)) != 0
- && ((insn == slot_insn)
- || (check_depend == 1
- && GET_CODE (slot_insn) == INSN
- && GET_CODE (slot_pat) == SET
- && ((GET_CODE (SET_DEST (slot_pat)) == REG
- && GET_CODE (SET_SRC (pat)) == REG
- && REGNO (SET_DEST (slot_pat)) ==
- REGNO (SET_SRC (pat)))
- || (GET_CODE (SET_DEST (slot_pat)) == SUBREG
- && GET_CODE (SET_SRC (pat)) == SUBREG
- && REGNO (SUBREG_REG (SET_DEST (slot_pat))) ==
- REGNO (SUBREG_REG (SET_SRC (pat)))
- && SUBREG_BYTE (SET_DEST (slot_pat)) ==
- SUBREG_BYTE (SET_SRC (pat)))))
- || (check_fpmode_conflict == 1
- && GET_CODE (slot_insn) == INSN
- && GET_CODE (slot_pat) == SET
- && (GET_MODE (SET_DEST (slot_pat)) == SFmode
- || GET_MODE (SET_DEST (slot_pat)) == DFmode)
- && GET_MODE (SET_DEST (slot_pat)) != fpmode)))
- goto next;
- }
-
- /* Check for peculiar result availability and dispatch
- interference situations. */
- if (pat != 0
- && ultra_cycles_elapsed > 0)
- {
- rtx link;
-
- for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
- {
- rtx link_insn = XEXP (link, 0);
- if (GET_CODE (link_insn) == INSN
- && recog_memoized (link_insn) >= 0
- && (TMASK (get_attr_type (link_insn)) &
- (TMASK (TYPE_CMOVE) | TMASK (TYPE_FPCMOVE)))
- && ! ultra_cmove_results_ready_p (link_insn))
- goto next;
- }
-
- if (check_fpmode_conflict
- && ultra_fpmode_conflict_exists (fpmode))
- goto next;
- }
-
- return &list[i];
- }
- next:
- ;
- }
- return 0;
-}
-
-static void
-ultra_build_types_avail (ready, n_ready)
- rtx *ready;
- int n_ready;
-{
- int i = n_ready - 1;
-
- ultra_types_avail = 0;
- while(i >= 0)
- {
- rtx insn = ready[i];
-
- if (recog_memoized (insn) >= 0)
- ultra_types_avail |= TMASK (get_attr_type (insn));
-
- i -= 1;
- }
-}
-
-/* Place insn pointed to my IP into the pipeline.
- Make element THIS of READY be that insn if it
- is not already. TYPE indicates the pipeline class
- this insn falls into. */
-static void
-ultra_schedule_insn (ip, ready, this, type)
- rtx *ip;
- rtx *ready;
- int this;
- enum ultra_code type;
-{
- int pipe_slot;
- char mask = ultra_pipe.free_slot_mask;
- rtx temp;
-
- /* Obtain free slot. */
- for (pipe_slot = 0; pipe_slot < 4; pipe_slot++)
- if ((mask & (1 << pipe_slot)) != 0)
- break;
- if (pipe_slot == 4)
- abort ();
-
- /* In it goes, and it hasn't been committed yet. */
- ultra_pipe.group[pipe_slot] = *ip;
- ultra_pipe.codes[pipe_slot] = type;
- ultra_pipe.contents[type] = 1;
- if (UMASK (type) &
- (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
- ultra_pipe.num_ieu_insns += 1;
-
- ultra_pipe.free_slot_mask = (mask & ~(1 << pipe_slot));
- ultra_pipe.group_size += 1;
- ultra_pipe.commit[pipe_slot] = 0;
-
- /* Update ready list. */
- temp = *ip;
- while (ip != &ready[this])
- {
- ip[0] = ip[1];
- ++ip;
- }
- *ip = temp;
-}
-
-/* Advance to the next pipeline group. */
-static void
-ultra_flush_pipeline ()
-{
- ultra_cur_hist = (ultra_cur_hist + 1) & (ULTRA_NUM_HIST - 1);
- ultra_cycles_elapsed += 1;
- memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
- ultra_pipe.free_slot_mask = 0xf;
-}
-
-/* Init our data structures for this current block. */
-static void
-ultrasparc_sched_init ()
-{
- memset ((char *) ultra_pipe_hist, 0, sizeof ultra_pipe_hist);
- ultra_cur_hist = 0;
- ultra_cycles_elapsed = 0;
- ultra_pipe.free_slot_mask = 0xf;
-}
-
-static void
-sparc_sched_init (dump, sched_verbose, max_ready)
- FILE *dump ATTRIBUTE_UNUSED;
- int sched_verbose ATTRIBUTE_UNUSED;
- int max_ready ATTRIBUTE_UNUSED;
-{
- if (sparc_cpu == PROCESSOR_ULTRASPARC)
- ultrasparc_sched_init ();
-}
-
-/* INSN has been scheduled, update pipeline commit state
- and return how many instructions are still to be
- scheduled in this group. */
-static int
-ultrasparc_variable_issue (insn)
- rtx insn;
-{
- struct ultrasparc_pipeline_state *up = &ultra_pipe;
- int i, left_to_fire;
-
- left_to_fire = 0;
- for (i = 0; i < 4; i++)
- {
- if (up->group[i] == 0)
- continue;
-
- if (up->group[i] == insn)
- {
- up->commit[i] = 1;
- }
- else if (! up->commit[i])
- left_to_fire++;
- }
-
- return left_to_fire;
-}
-
-static int
-sparc_variable_issue (dump, sched_verbose, insn, cim)
- FILE *dump ATTRIBUTE_UNUSED;
- int sched_verbose ATTRIBUTE_UNUSED;
- rtx insn;
- int cim;
-{
- if (sparc_cpu == PROCESSOR_ULTRASPARC)
- return ultrasparc_variable_issue (insn);
- else
- return cim - 1;
-}
-
-/* In actual_hazard_this_instance, we may have yanked some
- instructions from the ready list due to conflict cost
- adjustments. If so, and such an insn was in our pipeline
- group, remove it and update state. */
-static void
-ultra_rescan_pipeline_state (ready, n_ready)
- rtx *ready;
- int n_ready;
-{
- struct ultrasparc_pipeline_state *up = &ultra_pipe;
- int i;
-
- for (i = 0; i < 4; i++)
- {
- rtx insn = up->group[i];
- int j;
-
- if (! insn)
- continue;
-
- /* If it has been committed, then it was removed from
- the ready list because it was actually scheduled,
- and that is not the case we are searching for here. */
- if (up->commit[i] != 0)
- continue;
-
- for (j = n_ready - 1; j >= 0; j--)
- if (ready[j] == insn)
- break;
-
- /* If we didn't find it, toss it. */
- if (j < 0)
- {
- enum ultra_code ucode = up->codes[i];
-
- up->group[i] = 0;
- up->codes[i] = NONE;
- up->contents[ucode] = 0;
- if (UMASK (ucode) &
- (UMASK (IEUN) | UMASK (IEU0) | UMASK (IEU1)))
- up->num_ieu_insns -= 1;
-
- up->free_slot_mask |= (1 << i);
- up->group_size -= 1;
- up->commit[i] = 0;
- }
- }
-}
-
-static void
-ultrasparc_sched_reorder (dump, sched_verbose, ready, n_ready)
- FILE *dump;
- int sched_verbose;
- rtx *ready;
- int n_ready;
-{
- struct ultrasparc_pipeline_state *up = &ultra_pipe;
- int i, this_insn;
-
- if (sched_verbose)
- {
- int n;
-
- fprintf (dump, "\n;;\tUltraSPARC Looking at [");
- for (n = n_ready - 1; n >= 0; n--)
- {
- rtx insn = ready[n];
- enum ultra_code ucode;
-
- if (recog_memoized (insn) < 0)
- continue;
- ucode = ultra_code_from_mask (TMASK (get_attr_type (insn)));
- if (n != 0)
- fprintf (dump, "%s(%d) ",
- ultra_code_names[ucode],
- INSN_UID (insn));
- else
- fprintf (dump, "%s(%d)",
- ultra_code_names[ucode],
- INSN_UID (insn));
- }
- fprintf (dump, "]\n");
- }
-
- this_insn = n_ready - 1;
-
- /* Skip over junk we don't understand. */
- while ((this_insn >= 0)
- && recog_memoized (ready[this_insn]) < 0)
- this_insn--;
-
- ultra_build_types_avail (ready, this_insn + 1);
-
- while (this_insn >= 0) {
- int old_group_size = up->group_size;
-
- if (up->group_size != 0)
- {
- int num_committed;
-
- num_committed = (up->commit[0] + up->commit[1] +
- up->commit[2] + up->commit[3]);
- /* If nothing has been commited from our group, or all of
- them have. Clear out the (current cycle's) pipeline
- state and start afresh. */
- if (num_committed == 0
- || num_committed == up->group_size)
- {
- ultra_flush_pipeline ();
- up = &ultra_pipe;
- old_group_size = 0;
- }
- else
- {
- /* OK, some ready list insns got requeued and thus removed
- from the ready list. Account for this fact. */
- ultra_rescan_pipeline_state (ready, n_ready);
-
- /* Something "changed", make this look like a newly
- formed group so the code at the end of the loop
- knows that progress was in fact made. */
- if (up->group_size != old_group_size)
- old_group_size = 0;
- }
- }
-
- if (up->group_size == 0)
- {
- /* If the pipeline is (still) empty and we have any single
- group insns, get them out now as this is a good time. */
- rtx *ip = ultra_find_type ((TMASK (TYPE_RETURN) | TMASK (TYPE_IDIV) |
- TMASK (TYPE_IMUL) | TMASK (TYPE_CMOVE) |
- TMASK (TYPE_MULTI) | TMASK (TYPE_MISC)),
- ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, SINGLE);
- break;
- }
-
- /* If we are not in the process of emptying out the pipe, try to
- obtain an instruction which must be the first in it's group. */
- ip = ultra_find_type ((TMASK (TYPE_CALL) |
- TMASK (TYPE_SIBCALL) |
- TMASK (TYPE_CALL_NO_DELAY_SLOT) |
- TMASK (TYPE_UNCOND_BRANCH)),
- ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, IEU1);
- this_insn--;
- }
- else if ((ip = ultra_find_type ((TMASK (TYPE_FPDIVS) |
- TMASK (TYPE_FPDIVD) |
- TMASK (TYPE_FPSQRTS) |
- TMASK (TYPE_FPSQRTD)),
- ready, this_insn)) != 0)
- {
- ultra_schedule_insn (ip, ready, this_insn, FPM);
- this_insn--;
- }
- }
-
- /* Try to fill the integer pipeline. First, look for an IEU0 specific
- operation. We can't do more IEU operations if the first 3 slots are
- all full or we have dispatched two IEU insns already. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->num_ieu_insns < 2
- && up->contents[IEU0] == 0
- && up->contents[IEUN] == 0)
- {
- rtx *ip = ultra_find_type (TMASK(TYPE_SHIFT), ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, IEU0);
- this_insn--;
- }
- }
-
- /* If we can, try to find an IEU1 specific or an unnamed
- IEU instruction. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->num_ieu_insns < 2)
- {
- rtx *ip = ultra_find_type ((TMASK (TYPE_IALU) |
- (up->contents[IEU1] == 0 ? TMASK (TYPE_COMPARE) : 0)),
- ready, this_insn);
- if (ip)
- {
- rtx insn = *ip;
-
- ultra_schedule_insn (ip, ready, this_insn,
- (!up->contents[IEU1]
- && get_attr_type (insn) == TYPE_COMPARE)
- ? IEU1 : IEUN);
- this_insn--;
- }
- }
-
- /* If only one IEU insn has been found, try to find another unnamed
- IEU operation or an IEU1 specific one. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->num_ieu_insns < 2)
- {
- rtx *ip;
- int tmask = TMASK (TYPE_IALU);
-
- if (!up->contents[IEU1])
- tmask |= TMASK (TYPE_COMPARE);
- ip = ultra_find_type (tmask, ready, this_insn);
- if (ip)
- {
- rtx insn = *ip;
-
- ultra_schedule_insn (ip, ready, this_insn,
- (!up->contents[IEU1]
- && get_attr_type (insn) == TYPE_COMPARE)
- ? IEU1 : IEUN);
- this_insn--;
- }
- }
-
- /* Try for a load or store, but such an insn can only be issued
- if it is within' one of the first 3 slots. */
- if ((up->free_slot_mask & 0x7) != 0
- && up->contents[LSU] == 0)
- {
- rtx *ip = ultra_find_type ((TMASK (TYPE_LOAD) | TMASK (TYPE_SLOAD) |
- TMASK (TYPE_STORE) | TMASK (TYPE_FPLOAD) |
- TMASK (TYPE_FPSTORE)), ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, LSU);
- this_insn--;
- }
- }
-
- /* Now find FPU operations, first FPM class. But not divisions or
- square-roots because those will break the group up. Unlike all
- the previous types, these can go in any slot. */
- if (up->free_slot_mask != 0
- && up->contents[FPM] == 0)
- {
- rtx *ip = ultra_find_type (TMASK (TYPE_FPMUL), ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, FPM);
- this_insn--;
- }
- }
-
- /* Continue on with FPA class if we have not filled the group already. */
- if (up->free_slot_mask != 0
- && up->contents[FPA] == 0)
- {
- rtx *ip = ultra_find_type ((TMASK (TYPE_FPMOVE) | TMASK (TYPE_FPCMOVE) |
- TMASK (TYPE_FP) | TMASK (TYPE_FPCMP)),
- ready, this_insn);
- if (ip)
- {
- ultra_schedule_insn (ip, ready, this_insn, FPA);
- this_insn--;
- }
- }
+ /* If a load, then the dependence must be on the memory address. If
+ the addresses aren't equal, then it might be a false dependency */
+ if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
+ {
+ if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
+ || GET_CODE (SET_DEST (dep_pat)) != MEM
+ || GET_CODE (SET_SRC (pat)) != MEM
+ || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
+ XEXP (SET_SRC (pat), 0)))
+ return cost + 2;
- /* Finally, maybe stick a branch in here. */
- if (up->free_slot_mask != 0
- && up->contents[CTI] == 0)
- {
- rtx *ip = ultra_find_type (TMASK (TYPE_BRANCH), ready, this_insn);
+ return cost + 8;
+ }
+ break;
- /* Try to slip in a branch only if it is one of the
- next 2 in the ready list. */
- if (ip && ((&ready[this_insn] - ip) < 2))
- {
- ultra_schedule_insn (ip, ready, this_insn, CTI);
- this_insn--;
- }
- }
+ case TYPE_BRANCH:
+ /* Compare to branch latency is 0. There is no benefit from
+ separating compare and branch. */
+ if (dep_type == TYPE_COMPARE)
+ return 0;
+ /* Floating point compare to branch latency is less than
+ compare to conditional move. */
+ if (dep_type == TYPE_FPCMP)
+ return cost - 1;
+ break;
+ default:
+ break;
+ }
+ break;
- up->group_size = 0;
- for (i = 0; i < 4; i++)
- if ((up->free_slot_mask & (1 << i)) == 0)
- up->group_size++;
+ case REG_DEP_ANTI:
+ /* Anti-dependencies only penalize the fpu unit. */
+ if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
+ return 0;
+ break;
- /* See if we made any progress... */
- if (old_group_size != up->group_size)
+ default:
break;
+ }
- /* Clean out the (current cycle's) pipeline state
- and try once more. If we placed no instructions
- into the pipeline at all, it means a real hard
- conflict exists with some earlier issued instruction
- so we must advance to the next cycle to clear it up. */
- if (up->group_size == 0)
- {
- ultra_flush_pipeline ();
- up = &ultra_pipe;
- }
- else
- {
- memset ((char *) &ultra_pipe, 0, sizeof ultra_pipe);
- ultra_pipe.free_slot_mask = 0xf;
- }
- }
+ return cost;
+}
- if (sched_verbose)
+static int
+sparc_adjust_cost(insn, link, dep, cost)
+ rtx insn;
+ rtx link;
+ rtx dep;
+ int cost;
+{
+ switch (sparc_cpu)
{
- int n, gsize;
-
- fprintf (dump, ";;\tUltraSPARC Launched [");
- gsize = up->group_size;
- for (n = 0; n < 4; n++)
- {
- rtx insn = up->group[n];
-
- if (! insn)
- continue;
-
- gsize -= 1;
- if (gsize != 0)
- fprintf (dump, "%s(%d) ",
- ultra_code_names[up->codes[n]],
- INSN_UID (insn));
- else
- fprintf (dump, "%s(%d)",
- ultra_code_names[up->codes[n]],
- INSN_UID (insn));
- }
- fprintf (dump, "]\n");
+ case PROCESSOR_SUPERSPARC:
+ cost = supersparc_adjust_cost (insn, link, dep, cost);
+ break;
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ cost = hypersparc_adjust_cost (insn, link, dep, cost);
+ break;
+ default:
+ break;
}
+ return cost;
}
+static void
+sparc_sched_init (dump, sched_verbose, max_ready)
+ FILE *dump ATTRIBUTE_UNUSED;
+ int sched_verbose ATTRIBUTE_UNUSED;
+ int max_ready ATTRIBUTE_UNUSED;
+{
+}
+
static int
-sparc_sched_reorder (dump, sched_verbose, ready, n_readyp, clock)
- FILE *dump;
- int sched_verbose;
- rtx *ready;
- int *n_readyp;
- int clock ATTRIBUTE_UNUSED;
+sparc_use_dfa_pipeline_interface ()
{
- if (sparc_cpu == PROCESSOR_ULTRASPARC)
- ultrasparc_sched_reorder (dump, sched_verbose, ready, *n_readyp);
- return sparc_issue_rate ();
+ if ((1 << sparc_cpu) &
+ ((1 << PROCESSOR_ULTRASPARC) | (1 << PROCESSOR_CYPRESS) |
+ (1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
+ (1 << PROCESSOR_SPARCLITE86X) | (1 << PROCESSOR_TSC701) |
+ (1 << PROCESSOR_ULTRASPARC3)))
+ return 1;
+ return 0;
+}
+
+static int
+sparc_use_sched_lookahead ()
+{
+ if (sparc_cpu == PROCESSOR_ULTRASPARC
+ || sparc_cpu == PROCESSOR_ULTRASPARC3)
+ return 4;
+ if ((1 << sparc_cpu) &
+ ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
+ (1 << PROCESSOR_SPARCLITE86X)))
+ return 3;
+ return 0;
}
-static int
+static int
sparc_issue_rate ()
{
switch (sparc_cpu)
{
- default:
- return 1;
- case PROCESSOR_V9:
+ default:
+ return 1;
+ case PROCESSOR_V9:
/* Assume V9 processors are capable of at least dual-issue. */
return 2;
- case PROCESSOR_SUPERSPARC:
- return 3;
+ case PROCESSOR_SUPERSPARC:
+ return 3;
case PROCESSOR_HYPERSPARC:
case PROCESSOR_SPARCLITE86X:
return 2;
- case PROCESSOR_ULTRASPARC:
- return 4;
+ case PROCESSOR_ULTRASPARC:
+ case PROCESSOR_ULTRASPARC3:
+ return 4;
}
}
else
return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
}
-
-
-/* Return 1 if DEST and SRC reference only global and in registers. */
-
-int
-sparc_return_peephole_ok (dest, src)
- rtx dest, src;
-{
- if (! TARGET_V9)
- return 0;
- if (current_function_uses_only_leaf_regs)
- return 0;
- if (GET_CODE (src) != CONST_INT
- && (GET_CODE (src) != REG || ! IN_OR_GLOBAL_P (src)))
- return 0;
- return IN_OR_GLOBAL_P (dest);
-}
\f
-/* Output assembler code to FILE to increment profiler label # LABELNO
- for profiling a function entry.
-
- 32 bit sparc uses %g2 as the STATIC_CHAIN_REGNUM which gets clobbered
- during profiling so we need to save/restore it around the call to mcount.
- We're guaranteed that a save has just been done, and we use the space
- allocated for intreg/fpreg value passing. */
+/* Output rtl to increment the profiler label LABELNO
+ for profiling a function entry. */
void
-sparc_function_profiler (file, labelno)
- FILE *file;
+sparc_profile_hook (labelno)
int labelno;
{
char buf[32];
- ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
-
- if (! TARGET_ARCH64)
- fputs ("\tst\t%g2, [%fp-4]\n", file);
-
- fputs ("\tsethi\t%hi(", file);
- assemble_name (file, buf);
- fputs ("), %o0\n", file);
-
- fputs ("\tcall\t", file);
- assemble_name (file, MCOUNT_FUNCTION);
- putc ('\n', file);
-
- fputs ("\t or\t%o0, %lo(", file);
- assemble_name (file, buf);
- fputs ("), %o0\n", file);
-
- if (! TARGET_ARCH64)
- fputs ("\tld\t[%fp-4], %g2\n", file);
-}
-
-
-/* Mark ARG, which is really a struct ultrasparc_pipline_state *, for
- GC. */
+ rtx lab, fun;
-static void
-mark_ultrasparc_pipeline_state (arg)
- void *arg;
-{
- struct ultrasparc_pipeline_state *ups;
- size_t i;
+ ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
+ lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
+ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
- ups = (struct ultrasparc_pipeline_state *) arg;
- for (i = 0; i < sizeof (ups->group) / sizeof (rtx); ++i)
- ggc_mark_rtx (ups->group[i]);
+ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lab, Pmode);
}
-
+\f
/* Called to register all of our global variables with the garbage
collector. */
{
ggc_add_rtx_root (&sparc_compare_op0, 1);
ggc_add_rtx_root (&sparc_compare_op1, 1);
- ggc_add_rtx_root (&leaf_label, 1);
ggc_add_rtx_root (&global_offset_table, 1);
ggc_add_rtx_root (&get_pc_symbol, 1);
ggc_add_rtx_root (&sparc_addr_diff_list, 1);
ggc_add_rtx_root (&sparc_addr_list, 1);
- ggc_add_root (ultra_pipe_hist, ARRAY_SIZE (ultra_pipe_hist),
- sizeof (ultra_pipe_hist[0]), &mark_ultrasparc_pipeline_state);
}
#ifdef OBJECT_FORMAT_ELF
fputc ('\n', asm_out_file);
}
#endif /* OBJECT_FORMAT_ELF */
+
+/* ??? Similar to the standard section selection, but force reloc-y-ness
+ if SUNOS4_SHARED_LIBRARIES. Unclear why this helps (as opposed to
+ pretending PIC always on), but that's what the old code did. */
+
+static void
+sparc_aout_select_section (t, reloc, align)
+ tree t;
+ int reloc;
+ unsigned HOST_WIDE_INT align;
+{
+ default_select_section (t, reloc | SUNOS4_SHARED_LIBRARIES, align)
+}
+
+int
+sparc_extra_constraint_check (op, c, strict)
+ rtx op;
+ int c;
+ int strict;
+{
+ int reload_ok_mem;
+
+ if (TARGET_ARCH64
+ && (c == 'T' || c == 'U'))
+ return 0;
+
+ switch (c)
+ {
+ case 'Q':
+ return fp_sethi_p (op);
+
+ case 'R':
+ return fp_mov_p (op);
+
+ case 'S':
+ return fp_high_losum_p (op);
+
+ case 'U':
+ if (! strict
+ || (GET_CODE (op) == REG
+ && (REGNO (op) < FIRST_PSEUDO_REGISTER
+ || reg_renumber[REGNO (op)] >= 0)))
+ return register_ok_for_ldd (op);
+
+ return 0;
+
+ case 'W':
+ case 'T':
+ break;
+
+ default:
+ return 0;
+ }
+
+ /* Our memory extra constraints have to emulate the
+ behavior of 'm' and 'o' in order for reload to work
+ correctly. */
+ if (GET_CODE (op) == MEM)
+ {
+ reload_ok_mem = 0;
+ if ((TARGET_ARCH64 || mem_min_alignment (op, 8))
+ && (! strict
+ || strict_memory_address_p (Pmode, XEXP (op, 0))))
+ reload_ok_mem = 1;
+ }
+ else
+ {
+ reload_ok_mem = (reload_in_progress
+ && GET_CODE (op) == REG
+ && REGNO (op) >= FIRST_PSEUDO_REGISTER
+ && reg_renumber [REGNO (op)] < 0);
+ }
+
+ return reload_ok_mem;
+}
+
+/* ??? This duplicates information provided to the compiler by the
+ ??? scheduler description. Some day, teach genautomata to output
+ ??? the latencies and then CSE will just use that. */
+
+int
+sparc_rtx_costs (x, code, outer_code)
+ rtx x;
+ enum rtx_code code, outer_code;
+{
+ switch (code)
+ {
+ case PLUS: case MINUS: case ABS: case NEG:
+ case FLOAT: case UNSIGNED_FLOAT:
+ case FIX: case UNSIGNED_FIX:
+ case FLOAT_EXTEND: case FLOAT_TRUNCATE:
+ if (FLOAT_MODE_P (GET_MODE (x)))
+ {
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ case PROCESSOR_ULTRASPARC3:
+ return COSTS_N_INSNS (4);
+
+ case PROCESSOR_SUPERSPARC:
+ return COSTS_N_INSNS (3);
+
+ case PROCESSOR_CYPRESS:
+ return COSTS_N_INSNS (5);
+
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ default:
+ return COSTS_N_INSNS (1);
+ }
+ }
+
+ return COSTS_N_INSNS (1);
+
+ case SQRT:
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ if (GET_MODE (x) == SFmode)
+ return COSTS_N_INSNS (13);
+ else
+ return COSTS_N_INSNS (23);
+
+ case PROCESSOR_ULTRASPARC3:
+ if (GET_MODE (x) == SFmode)
+ return COSTS_N_INSNS (20);
+ else
+ return COSTS_N_INSNS (29);
+
+ case PROCESSOR_SUPERSPARC:
+ return COSTS_N_INSNS (12);
+
+ case PROCESSOR_CYPRESS:
+ return COSTS_N_INSNS (63);
+
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ return COSTS_N_INSNS (17);
+
+ default:
+ return COSTS_N_INSNS (30);
+ }
+
+ case COMPARE:
+ if (FLOAT_MODE_P (GET_MODE (x)))
+ {
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ case PROCESSOR_ULTRASPARC3:
+ return COSTS_N_INSNS (1);
+
+ case PROCESSOR_SUPERSPARC:
+ return COSTS_N_INSNS (3);
+
+ case PROCESSOR_CYPRESS:
+ return COSTS_N_INSNS (5);
+
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ default:
+ return COSTS_N_INSNS (1);
+ }
+ }
+
+ /* ??? Maybe mark integer compares as zero cost on
+ ??? all UltraSPARC processors because the result
+ ??? can be bypassed to a branch in the same group. */
+
+ return COSTS_N_INSNS (1);
+
+ case MULT:
+ if (FLOAT_MODE_P (GET_MODE (x)))
+ {
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ case PROCESSOR_ULTRASPARC3:
+ return COSTS_N_INSNS (4);
+
+ case PROCESSOR_SUPERSPARC:
+ return COSTS_N_INSNS (3);
+
+ case PROCESSOR_CYPRESS:
+ return COSTS_N_INSNS (7);
+
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ return COSTS_N_INSNS (1);
+
+ default:
+ return COSTS_N_INSNS (5);
+ }
+ }
+
+ /* The latency is actually variable for Ultra-I/II
+ And if one of the inputs have a known constant
+ value, we could calculate this precisely.
+
+ However, for that to be useful we would need to
+ add some machine description changes which would
+ make sure small constants ended up in rs1 of the
+ multiply instruction. This is because the multiply
+ latency is determined by the number of clear (or
+ set if the value is negative) bits starting from
+ the most significant bit of the first input.
+
+ The algorithm for computing num_cycles of a multiply
+ on Ultra-I/II is:
+
+ if (rs1 < 0)
+ highest_bit = highest_clear_bit(rs1);
+ else
+ highest_bit = highest_set_bit(rs1);
+ if (num_bits < 3)
+ highest_bit = 3;
+ num_cycles = 4 + ((highest_bit - 3) / 2);
+
+ If we did that we would have to also consider register
+ allocation issues that would result from forcing such
+ a value into a register.
+
+ There are other similar tricks we could play if we
+ knew, for example, that one input was an array index.
+
+ Since we do not play any such tricks currently the
+ safest thing to do is report the worst case latency. */
+ if (sparc_cpu == PROCESSOR_ULTRASPARC)
+ return (GET_MODE (x) == DImode ?
+ COSTS_N_INSNS (34) : COSTS_N_INSNS (19));
+
+ /* Multiply latency on Ultra-III, fortunately, is constant. */
+ if (sparc_cpu == PROCESSOR_ULTRASPARC3)
+ return COSTS_N_INSNS (6);
+
+ if (sparc_cpu == PROCESSOR_HYPERSPARC
+ || sparc_cpu == PROCESSOR_SPARCLITE86X)
+ return COSTS_N_INSNS (17);
+
+ return (TARGET_HARD_MUL
+ ? COSTS_N_INSNS (5)
+ : COSTS_N_INSNS (25));
+
+ case DIV:
+ case UDIV:
+ case MOD:
+ case UMOD:
+ if (FLOAT_MODE_P (GET_MODE (x)))
+ {
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ if (GET_MODE (x) == SFmode)
+ return COSTS_N_INSNS (13);
+ else
+ return COSTS_N_INSNS (23);
+
+ case PROCESSOR_ULTRASPARC3:
+ if (GET_MODE (x) == SFmode)
+ return COSTS_N_INSNS (17);
+ else
+ return COSTS_N_INSNS (20);
+
+ case PROCESSOR_SUPERSPARC:
+ if (GET_MODE (x) == SFmode)
+ return COSTS_N_INSNS (6);
+ else
+ return COSTS_N_INSNS (9);
+
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ if (GET_MODE (x) == SFmode)
+ return COSTS_N_INSNS (8);
+ else
+ return COSTS_N_INSNS (12);
+
+ default:
+ return COSTS_N_INSNS (7);
+ }
+ }
+
+ if (sparc_cpu == PROCESSOR_ULTRASPARC)
+ return (GET_MODE (x) == DImode ?
+ COSTS_N_INSNS (68) : COSTS_N_INSNS (37));
+ if (sparc_cpu == PROCESSOR_ULTRASPARC3)
+ return (GET_MODE (x) == DImode ?
+ COSTS_N_INSNS (71) : COSTS_N_INSNS (40));
+ return COSTS_N_INSNS (25);
+
+ case IF_THEN_ELSE:
+ /* Conditional moves. */
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ return COSTS_N_INSNS (2);
+
+ case PROCESSOR_ULTRASPARC3:
+ if (FLOAT_MODE_P (GET_MODE (x)))
+ return COSTS_N_INSNS (3);
+ else
+ return COSTS_N_INSNS (2);
+
+ default:
+ return COSTS_N_INSNS (1);
+ }
+
+ case MEM:
+ /* If outer-code is SIGN/ZERO extension we have to subtract
+ out COSTS_N_INSNS (1) from whatever we return in determining
+ the cost. */
+ switch (sparc_cpu)
+ {
+ case PROCESSOR_ULTRASPARC:
+ if (outer_code == ZERO_EXTEND)
+ return COSTS_N_INSNS (1);
+ else
+ return COSTS_N_INSNS (2);
+
+ case PROCESSOR_ULTRASPARC3:
+ if (outer_code == ZERO_EXTEND)
+ {
+ if (GET_MODE (x) == QImode
+ || GET_MODE (x) == HImode
+ || outer_code == SIGN_EXTEND)
+ return COSTS_N_INSNS (2);
+ else
+ return COSTS_N_INSNS (1);
+ }
+ else
+ {
+ /* This handles sign extension (3 cycles)
+ and everything else (2 cycles). */
+ return COSTS_N_INSNS (2);
+ }
+
+ case PROCESSOR_SUPERSPARC:
+ if (FLOAT_MODE_P (GET_MODE (x))
+ || outer_code == ZERO_EXTEND
+ || outer_code == SIGN_EXTEND)
+ return COSTS_N_INSNS (0);
+ else
+ return COSTS_N_INSNS (1);
+
+ case PROCESSOR_TSC701:
+ if (outer_code == ZERO_EXTEND
+ || outer_code == SIGN_EXTEND)
+ return COSTS_N_INSNS (2);
+ else
+ return COSTS_N_INSNS (3);
+
+ case PROCESSOR_CYPRESS:
+ if (outer_code == ZERO_EXTEND
+ || outer_code == SIGN_EXTEND)
+ return COSTS_N_INSNS (1);
+ else
+ return COSTS_N_INSNS (2);
+
+ case PROCESSOR_HYPERSPARC:
+ case PROCESSOR_SPARCLITE86X:
+ default:
+ if (outer_code == ZERO_EXTEND
+ || outer_code == SIGN_EXTEND)
+ return COSTS_N_INSNS (0);
+ else
+ return COSTS_N_INSNS (1);
+ }
+
+ case CONST_INT:
+ if (INTVAL (x) < 0x1000 && INTVAL (x) >= -0x1000)
+ return 0;
+
+ /* fallthru */
+ case HIGH:
+ return 2;
+
+ case CONST:
+ case LABEL_REF:
+ case SYMBOL_REF:
+ return 4;
+
+ case CONST_DOUBLE:
+ if (GET_MODE (x) == DImode)
+ if ((XINT (x, 3) == 0
+ && (unsigned) XINT (x, 2) < 0x1000)
+ || (XINT (x, 3) == -1
+ && XINT (x, 2) < 0
+ && XINT (x, 2) >= -0x1000))
+ return 0;
+ return 8;
+
+ default:
+ abort();
+ };
+}