#include "opts.h"
/* Processor costs */
+
+struct processor_costs {
+ /* Integer load */
+ const int int_load;
+
+ /* Integer signed load */
+ const int int_sload;
+
+ /* Integer zeroed load */
+ const int int_zload;
+
+ /* Float load */
+ const int float_load;
+
+ /* fmov, fneg, fabs */
+ const int float_move;
+
+ /* fadd, fsub */
+ const int float_plusminus;
+
+ /* fcmp */
+ const int float_cmp;
+
+ /* fmov, fmovr */
+ const int float_cmove;
+
+ /* fmul */
+ const int float_mul;
+
+ /* fdivs */
+ const int float_div_sf;
+
+ /* fdivd */
+ const int float_div_df;
+
+ /* fsqrts */
+ const int float_sqrt_sf;
+
+ /* fsqrtd */
+ const int float_sqrt_df;
+
+ /* umul/smul */
+ const int int_mul;
+
+ /* mulX */
+ const int int_mulX;
+
+ /* integer multiply cost for each bit set past the most
+ significant 3, so the formula for multiply cost becomes:
+
+ if (rs1 < 0)
+ highest_bit = highest_clear_bit(rs1);
+ else
+ highest_bit = highest_set_bit(rs1);
+ if (highest_bit < 3)
+ highest_bit = 3;
+ cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
+
+ A value of zero indicates that the multiply costs is fixed,
+ and not variable. */
+ const int int_mul_bit_factor;
+
+ /* udiv/sdiv */
+ const int int_div;
+
+ /* divX */
+ const int int_divX;
+
+ /* movcc, movr */
+ const int int_cmove;
+
+ /* penalty for shifts, due to scheduling rules etc. */
+ const int shift_penalty;
+};
+
static const
struct processor_costs cypress_costs = {
COSTS_N_INSNS (2), /* int load */
COSTS_N_INSNS (5), /* imul */
COSTS_N_INSNS (5), /* imulX */
0, /* imul bit factor */
- COSTS_N_INSNS (31), /* idiv, average of 12 - 41 cycle range */
- COSTS_N_INSNS (31), /* idivX, average of 12 - 41 cycle range */
+ COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
+ COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
+ COSTS_N_INSNS (1), /* movcc/movr */
+ 0, /* shift penalty */
+};
+
+static const
+struct processor_costs niagara3_costs = {
+ COSTS_N_INSNS (3), /* int load */
+ COSTS_N_INSNS (3), /* int signed load */
+ COSTS_N_INSNS (3), /* int zeroed load */
+ COSTS_N_INSNS (3), /* float load */
+ COSTS_N_INSNS (9), /* fmov, fneg, fabs */
+ COSTS_N_INSNS (9), /* fadd, fsub */
+ COSTS_N_INSNS (9), /* fcmp */
+ COSTS_N_INSNS (9), /* fmov, fmovr */
+ COSTS_N_INSNS (9), /* fmul */
+ COSTS_N_INSNS (23), /* fdivs */
+ COSTS_N_INSNS (37), /* fdivd */
+ COSTS_N_INSNS (23), /* fsqrts */
+ COSTS_N_INSNS (37), /* fsqrtd */
+ COSTS_N_INSNS (9), /* imul */
+ COSTS_N_INSNS (9), /* imulX */
+ 0, /* imul bit factor */
+ COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
+ COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
COSTS_N_INSNS (1), /* movcc/movr */
0, /* shift penalty */
};
-const struct processor_costs *sparc_costs = &cypress_costs;
+static const struct processor_costs *sparc_costs = &cypress_costs;
#ifdef HAVE_AS_RELAX_OPTION
/* If 'as' and 'ld' are relaxing tail call insns into branch always, use
72, 73, 74, 75, 76, 77, 78, 79,
80, 81, 82, 83, 84, 85, 86, 87,
88, 89, 90, 91, 92, 93, 94, 95,
- 96, 97, 98, 99, 100};
+ 96, 97, 98, 99, 100, 101, 102};
/* Vector, indexed by hard register number, which contains 1
for a register that is allowable in a candidate for leaf
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1};
+ 1, 1, 1, 1, 1, 1, 1};
struct GTY(()) machine_function
{
HOST_WIDE_INT, tree);
static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
HOST_WIDE_INT, const_tree);
+static void sparc_reorg (void);
static struct machine_function * sparc_init_machine_status (void);
static bool sparc_cannot_force_const_mem (enum machine_mode, rtx);
static rtx sparc_tls_get_addr (void);
static int get_some_local_dynamic_name_1 (rtx *, void *);
static int sparc_register_move_cost (enum machine_mode,
reg_class_t, reg_class_t);
-static bool sparc_rtx_costs (rtx, int, int, int *, bool);
+static bool sparc_rtx_costs (rtx, int, int, int, int *, bool);
static rtx sparc_function_value (const_tree, const_tree, bool);
static rtx sparc_libcall_value (enum machine_mode, const_rtx);
static bool sparc_function_value_regno_p (const unsigned int);
static bool sparc_print_operand_punct_valid_p (unsigned char);
static void sparc_print_operand (FILE *, rtx, int);
static void sparc_print_operand_address (FILE *, rtx);
+static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
+ enum machine_mode,
+ secondary_reload_info *);
\f
#ifdef SUBTARGET_ATTRIBUTE_TABLE
/* Table of valid machine attributes. */
#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG sparc_reorg
+
#undef TARGET_RTX_COSTS
#define TARGET_RTX_COSTS sparc_rtx_costs
#undef TARGET_ADDRESS_COST
#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD sparc_secondary_reload
+
#undef TARGET_CONDITIONAL_REGISTER_USAGE
#define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
struct gcc_target targetm = TARGET_INITIALIZER;
+static void
+dump_target_flag_bits (const int flags)
+{
+ if (flags & MASK_64BIT)
+ fprintf (stderr, "64BIT ");
+ if (flags & MASK_APP_REGS)
+ fprintf (stderr, "APP_REGS ");
+ if (flags & MASK_FASTER_STRUCTS)
+ fprintf (stderr, "FASTER_STRUCTS ");
+ if (flags & MASK_FLAT)
+ fprintf (stderr, "FLAT ");
+ if (flags & MASK_FMAF)
+ fprintf (stderr, "FMAF ");
+ if (flags & MASK_FPU)
+ fprintf (stderr, "FPU ");
+ if (flags & MASK_HARD_QUAD)
+ fprintf (stderr, "HARD_QUAD ");
+ if (flags & MASK_POPC)
+ fprintf (stderr, "POPC ");
+ if (flags & MASK_PTR64)
+ fprintf (stderr, "PTR64 ");
+ if (flags & MASK_STACK_BIAS)
+ fprintf (stderr, "STACK_BIAS ");
+ if (flags & MASK_UNALIGNED_DOUBLES)
+ fprintf (stderr, "UNALIGNED_DOUBLES ");
+ if (flags & MASK_V8PLUS)
+ fprintf (stderr, "V8PLUS ");
+ if (flags & MASK_VIS)
+ fprintf (stderr, "VIS ");
+ if (flags & MASK_VIS2)
+ fprintf (stderr, "VIS2 ");
+ if (flags & MASK_VIS3)
+ fprintf (stderr, "VIS3 ");
+ if (flags & MASK_DEPRECATED_V8_INSNS)
+ fprintf (stderr, "DEPRECATED_V8_INSNS ");
+ if (flags & MASK_SPARCLET)
+ fprintf (stderr, "SPARCLET ");
+ if (flags & MASK_SPARCLITE)
+ fprintf (stderr, "SPARCLITE ");
+ if (flags & MASK_V8)
+ fprintf (stderr, "V8 ");
+ if (flags & MASK_V9)
+ fprintf (stderr, "V9 ");
+}
+
+static void
+dump_target_flags (const char *prefix, const int flags)
+{
+ fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
+ dump_target_flag_bits (flags);
+ fprintf(stderr, "]\n");
+}
+
/* Validate and override various options, and do some machine dependent
initialization. */
{ TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
{ TARGET_CPU_niagara, PROCESSOR_NIAGARA },
{ TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
+ { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
+ { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
{ -1, PROCESSOR_V7 }
};
const struct cpu_default *def;
/* Table of values for -m{cpu,tune}=. This must match the order of
the PROCESSOR_* enumeration. */
static struct cpu_table {
+ const char *const name;
const int disable;
const int enable;
} const cpu_table[] = {
- { MASK_ISA, 0 },
- { MASK_ISA, 0 },
- { MASK_ISA, MASK_V8 },
+ { "v7", MASK_ISA, 0 },
+ { "cypress", MASK_ISA, 0 },
+ { "v8", MASK_ISA, MASK_V8 },
/* TI TMS390Z55 supersparc */
- { MASK_ISA, MASK_V8 },
- { MASK_ISA, MASK_V8|MASK_FPU },
+ { "supersparc", MASK_ISA, MASK_V8 },
+ { "hypersparc", MASK_ISA, MASK_V8|MASK_FPU },
/* LEON */
- { MASK_ISA, MASK_V8|MASK_FPU },
- { MASK_ISA, MASK_SPARCLITE },
+ { "leon", MASK_ISA, MASK_V8|MASK_FPU },
+ { "sparclite", MASK_ISA, MASK_SPARCLITE },
/* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
- { MASK_ISA|MASK_FPU, MASK_SPARCLITE },
+ { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
/* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
- { MASK_ISA, MASK_SPARCLITE|MASK_FPU },
- { MASK_ISA|MASK_FPU, MASK_SPARCLITE },
- { MASK_ISA, MASK_SPARCLET },
+ { "f934", MASK_ISA, MASK_SPARCLITE|MASK_FPU },
+ { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
+ { "sparclet", MASK_ISA, MASK_SPARCLET },
/* TEMIC sparclet */
- { MASK_ISA, MASK_SPARCLET },
- { MASK_ISA, MASK_V9 },
+ { "tsc701", MASK_ISA, MASK_SPARCLET },
+ { "v9", MASK_ISA, MASK_V9 },
/* UltraSPARC I, II, IIi */
- { MASK_ISA,
+ { "ultrasparc", MASK_ISA,
/* Although insns using %y are deprecated, it is a clear win. */
- MASK_V9|MASK_DEPRECATED_V8_INSNS},
+ MASK_V9|MASK_DEPRECATED_V8_INSNS },
/* UltraSPARC III */
/* ??? Check if %y issue still holds true. */
- { MASK_ISA,
- MASK_V9|MASK_DEPRECATED_V8_INSNS},
+ { "ultrasparc3", MASK_ISA,
+ MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
/* UltraSPARC T1 */
- { MASK_ISA,
- MASK_V9|MASK_DEPRECATED_V8_INSNS},
+ { "niagara", MASK_ISA,
+ MASK_V9|MASK_DEPRECATED_V8_INSNS },
/* UltraSPARC T2 */
- { MASK_ISA, MASK_V9},
+ { "niagara2", MASK_ISA,
+ MASK_V9|MASK_POPC|MASK_VIS2 },
+ /* UltraSPARC T3 */
+ { "niagara3", MASK_ISA,
+ MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
+ /* UltraSPARC T4 */
+ { "niagara4", MASK_ISA,
+ MASK_V9|MASK_POPC|MASK_VIS2|MASK_VIS3|MASK_FMAF },
};
const struct cpu_table *cpu;
unsigned int i;
int fpu;
+ if (sparc_debug_string != NULL)
+ {
+ const char *q;
+ char *p;
+
+ p = ASTRDUP (sparc_debug_string);
+ while ((q = strtok (p, ",")) != NULL)
+ {
+ bool invert;
+ int mask;
+
+ p = NULL;
+ if (*q == '!')
+ {
+ invert = true;
+ q++;
+ }
+ else
+ invert = false;
+
+ if (! strcmp (q, "all"))
+ mask = MASK_DEBUG_ALL;
+ else if (! strcmp (q, "options"))
+ mask = MASK_DEBUG_OPTIONS;
+ else
+ error ("unknown -mdebug-%s switch", q);
+
+ if (invert)
+ sparc_debug &= ~mask;
+ else
+ sparc_debug |= mask;
+ }
+ }
+
+ if (TARGET_DEBUG_OPTIONS)
+ {
+ dump_target_flags("Initial target_flags", target_flags);
+ dump_target_flags("target_flags_explicit", target_flags_explicit);
+ }
+
#ifdef SUBTARGET_OVERRIDE_OPTIONS
SUBTARGET_OVERRIDE_OPTIONS;
#endif
gcc_assert (def->cpu != -1);
sparc_cpu_and_features = def->processor;
}
+
if (!global_options_set.x_sparc_cpu)
sparc_cpu = sparc_cpu_and_features;
cpu = &cpu_table[(int) sparc_cpu_and_features];
+
+ if (TARGET_DEBUG_OPTIONS)
+ {
+ fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
+ fprintf (stderr, "sparc_cpu: %s\n",
+ cpu_table[(int) sparc_cpu].name);
+ dump_target_flags ("cpu->disable", cpu->disable);
+ dump_target_flags ("cpu->enable", cpu->enable);
+ }
+
target_flags &= ~cpu->disable;
- target_flags |= cpu->enable;
+ target_flags |= (cpu->enable
+#ifndef HAVE_AS_FMAF_HPC_VIS3
+ & ~(MASK_FMAF | MASK_VIS3)
+#endif
+ );
/* If -mfpu or -mno-fpu was explicitly used, don't override with
the processor default. */
if (target_flags_explicit & MASK_FPU)
target_flags = (target_flags & ~MASK_FPU) | fpu;
- /* Don't allow -mvis if FPU is disabled. */
+ /* -mvis2 implies -mvis */
+ if (TARGET_VIS2)
+ target_flags |= MASK_VIS;
+
+ /* -mvis3 implies -mvis2 and -mvis */
+ if (TARGET_VIS3)
+ target_flags |= MASK_VIS2 | MASK_VIS;
+
+ /* Don't allow -mvis, -mvis2, -mvis3, or -mfmaf if FPU is disabled. */
if (! TARGET_FPU)
- target_flags &= ~MASK_VIS;
+ target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_FMAF);
/* -mvis assumes UltraSPARC+, so we are sure v9 instructions
are available.
target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
}
+ /* -mvis also implies -mv8plus on 32-bit */
+ if (TARGET_VIS && ! TARGET_ARCH64)
+ target_flags |= MASK_V8PLUS;
+
/* Use the deprecated v8 insns for sparc64 in 32 bit mode. */
if (TARGET_V9 && TARGET_ARCH32)
target_flags |= MASK_DEPRECATED_V8_INSNS;
&& (sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_ULTRASPARC3
|| sparc_cpu == PROCESSOR_NIAGARA
- || sparc_cpu == PROCESSOR_NIAGARA2))
+ || sparc_cpu == PROCESSOR_NIAGARA2
+ || sparc_cpu == PROCESSOR_NIAGARA3
+ || sparc_cpu == PROCESSOR_NIAGARA4))
align_functions = 32;
/* Validate PCC_STRUCT_RETURN. */
case PROCESSOR_NIAGARA2:
sparc_costs = &niagara2_costs;
break;
+ case PROCESSOR_NIAGARA3:
+ case PROCESSOR_NIAGARA4:
+ sparc_costs = &niagara3_costs;
+ break;
+ case PROCESSOR_NATIVE:
+ gcc_unreachable ();
};
+ if (sparc_memory_model == SMM_DEFAULT)
+ {
+ /* Choose the memory model for the operating system. */
+ enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
+ if (os_default != SMM_DEFAULT)
+ sparc_memory_model = os_default;
+ /* Choose the most relaxed model for the processor. */
+ else if (TARGET_V9)
+ sparc_memory_model = SMM_RMO;
+ else if (TARGET_V8)
+ sparc_memory_model = SMM_PSO;
+ else
+ sparc_memory_model = SMM_SC;
+ }
+
#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
target_flags |= MASK_LONG_DOUBLE_128;
#endif
+ if (TARGET_DEBUG_OPTIONS)
+ dump_target_flags ("Final target_flags", target_flags);
+
maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
((sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_NIAGARA
- || sparc_cpu == PROCESSOR_NIAGARA2)
+ || sparc_cpu == PROCESSOR_NIAGARA2
+ || sparc_cpu == PROCESSOR_NIAGARA3
+ || sparc_cpu == PROCESSOR_NIAGARA4)
? 2
: (sparc_cpu == PROCESSOR_ULTRASPARC3
? 8 : 3)),
((sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_ULTRASPARC3
|| sparc_cpu == PROCESSOR_NIAGARA
- || sparc_cpu == PROCESSOR_NIAGARA2)
+ || sparc_cpu == PROCESSOR_NIAGARA2
+ || sparc_cpu == PROCESSOR_NIAGARA3
+ || sparc_cpu == PROCESSOR_NIAGARA4)
? 64 : 32),
global_options.x_param_values,
global_options_set.x_param_values);
if (operands [1] == const0_rtx)
operands[1] = CONST0_RTX (mode);
- /* We can clear FP registers if TARGET_VIS, and always other regs. */
+ /* We can clear or set to all-ones FP registers if TARGET_VIS, and
+ always other regs. */
if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
- && const_zero_operand (operands[1], mode))
+ && (const_zero_operand (operands[1], mode)
+ || const_all_ones_operand (operands[1], mode)))
return false;
if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
&& (mode == SFmode
/* And any DF constant in integer registers. */
|| (mode == DFmode
- && (reload_completed || reload_in_progress))))
+ && ! can_create_pseudo_p ())))
return false;
operands[1] = force_const_mem (mode, operands[1]);
sparc_emit_set_const32 (rtx op0, rtx op1)
{
enum machine_mode mode = GET_MODE (op0);
- rtx temp;
+ rtx temp = op0;
- if (reload_in_progress || reload_completed)
- temp = op0;
- else
+ if (can_create_pseudo_p ())
temp = gen_reg_rtx (mode);
if (GET_CODE (op1) == CONST_INT)
unsigned HOST_WIDE_INT high_bits,
unsigned HOST_WIDE_INT low_bits)
{
- rtx sub_temp;
+ rtx sub_temp = op0;
- if (reload_in_progress || reload_completed)
- sub_temp = op0;
- else
+ if (can_create_pseudo_p ())
sub_temp = gen_reg_rtx (DImode);
if ((high_bits & 0xfffffc00) != 0)
sub_temp = temp;
}
- if (!reload_in_progress && !reload_completed)
+ if (can_create_pseudo_p ())
{
rtx temp2 = gen_reg_rtx (DImode);
rtx temp3 = gen_reg_rtx (DImode);
&& (GET_CODE (op0) == SUBREG
|| (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
- if (reload_in_progress || reload_completed)
+ if (! can_create_pseudo_p ())
temp = op0;
if (GET_CODE (op1) != CONST_INT)
}
else if (GET_MODE (x) == DImode)
{
- rtx pat = gen_seqdi_special (operands[0], x, y);
+ rtx pat = gen_seqdi_special (operands[0], x, y);
emit_insn (pat);
return true;
}
}
else if (GET_MODE (x) == DImode)
{
- rtx pat = gen_snedi_special (operands[0], x, y);
+ rtx pat;
+ if (TARGET_VIS3)
+ pat = gen_snedi_special_vis3 (operands[0], x, y);
+ else
+ pat = gen_snedi_special (operands[0], x, y);
emit_insn (pat);
return true;
}
}
- /* For the rest, on v9 we can use conditional moves. */
-
- if (TARGET_V9)
- {
- if (gen_v9_scc (operands[0], code, x, y))
- return true;
- }
+ if (TARGET_V9
+ && TARGET_ARCH64
+ && GET_MODE (x) == DImode
+ && !(TARGET_VIS3
+ && (code == GTU || code == LTU))
+ && gen_v9_scc (operands[0], code, x, y))
+ return true;
/* We can do LTU and GEU using the addx/subx instructions too. And
for GTU/LEU, if both operands are registers swap them and fall
}
}
- if (code == LTU || code == GEU)
+ if (code == LTU
+ || (!TARGET_VIS3 && code == GEU))
{
emit_insn (gen_rtx_SET (VOIDmode, operands[0],
gen_rtx_fmt_ee (code, SImode,
return true;
}
+ /* All the posibilities to use addx/subx based sequences has been
+ exhausted, try for a 3 instruction sequence using v9 conditional
+ moves. */
+ if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
+ return true;
+
/* Nope, do branches. */
return false;
}
int regno = REGNO (reg);
if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
- regno += (TARGET_ARCH64 && regno < 32) ? 1 : 2;
+ regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
return gen_rtx_REG (DFmode, regno);
}
\f
{
rtx pat = PATTERN (trial);
rtx src = SET_SRC (pat);
+ bool src_is_freg = false;
+ rtx src_reg;
+
+ /* Since we now can do moves between float and integer registers when
+ VIS3 is enabled, we have to catch this case. We can allow such
+ moves when doing a 'return' however. */
+ src_reg = src;
+ if (GET_CODE (src_reg) == SUBREG)
+ src_reg = SUBREG_REG (src_reg);
+ if (GET_CODE (src_reg) == REG
+ && SPARC_FP_REG_P (REGNO (src_reg)))
+ src_is_freg = true;
/* The 'restore src,%g0,dest' pattern for word mode and below. */
if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
- && arith_operand (src, GET_MODE (src)))
+ && arith_operand (src, GET_MODE (src))
+ && ! src_is_freg)
{
if (TARGET_ARCH64)
return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
/* The 'restore src,%g0,dest' pattern for double-word mode. */
else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
- && arith_double_operand (src, GET_MODE (src)))
+ && arith_double_operand (src, GET_MODE (src))
+ && ! src_is_freg)
return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
/* The 'restore src,%g0,dest' pattern for float if no FPU. */
int
eligible_for_return_delay (rtx trial)
{
+ int regno;
rtx pat;
- if (GET_CODE (trial) != INSN || GET_CODE (PATTERN (trial)) != SET)
+ if (GET_CODE (trial) != INSN)
return 0;
if (get_attr_length (trial) != 1)
get_attr_in_uncond_branch_delay (trial) == IN_UNCOND_BRANCH_DELAY_TRUE;
pat = PATTERN (trial);
+ if (GET_CODE (pat) == PARALLEL)
+ {
+ int i;
+
+ if (! TARGET_V9)
+ return 0;
+ for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
+ {
+ rtx expr = XVECEXP (pat, 0, i);
+ if (GET_CODE (expr) != SET)
+ return 0;
+ if (GET_CODE (SET_DEST (expr)) != REG)
+ return 0;
+ regno = REGNO (SET_DEST (expr));
+ if (regno >= 8 && regno < 24)
+ return 0;
+ }
+ return !epilogue_renumber (&pat, 1)
+ && (get_attr_in_uncond_branch_delay (trial)
+ == IN_UNCOND_BRANCH_DELAY_TRUE);
+ }
+
+ if (GET_CODE (pat) != SET)
+ return 0;
+
+ if (GET_CODE (SET_DEST (pat)) != REG)
+ return 0;
+
+ regno = REGNO (SET_DEST (pat));
/* Otherwise, only operations which can be done in tandem with
a `restore' or `return' insn can go into the delay slot. */
- if (GET_CODE (SET_DEST (pat)) != REG
- || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24))
+ if (regno >= 8 && regno < 24)
return 0;
/* If this instruction sets up floating point register and we have a return
instruction, it can probably go in. But restore will not work
with FP_REGS. */
- if (REGNO (SET_DEST (pat)) >= 32)
+ if (! SPARC_INT_REG_P (regno))
return (TARGET_V9
&& !epilogue_renumber (&pat, 1)
&& get_attr_in_uncond_branch_delay (trial)
a `restore' insn can go into the delay slot. */
if (GET_CODE (SET_DEST (pat)) != REG
|| (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
- || REGNO (SET_DEST (pat)) >= 32)
+ || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
return 0;
/* If it mentions %o7, it can't go in, because sibcall will clobber it
return eligible_for_restore_insn (trial, false);
}
-
-int
-short_branch (int uid1, int uid2)
-{
- int delta = INSN_ADDRESSES (uid1) - INSN_ADDRESSES (uid2);
-
- /* Leave a few words of "slop". */
- if (delta >= -1023 && delta <= 1022)
- return 1;
-
- return 0;
-}
-
-/* Return nonzero if REG is not used after INSN.
- We assume REG is a reload reg, and therefore does
- not live past labels or calls or jumps. */
-int
-reg_unused_after (rtx reg, rtx insn)
-{
- enum rtx_code code, prev_code = UNKNOWN;
-
- while ((insn = NEXT_INSN (insn)))
- {
- if (prev_code == CALL_INSN && call_used_regs[REGNO (reg)])
- return 1;
-
- code = GET_CODE (insn);
- if (GET_CODE (insn) == CODE_LABEL)
- return 1;
-
- if (INSN_P (insn))
- {
- rtx set = single_set (insn);
- int in_src = set && reg_overlap_mentioned_p (reg, SET_SRC (set));
- if (set && in_src)
- return 0;
- if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
- return 1;
- if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
- return 0;
- }
- prev_code = code;
- }
- return 1;
-}
\f
/* Determine if it's legal to put X into the constant pool. This
is not possible if X contains the address of a symbol that is
return true;
/* Floating point constants are generally not ok.
- The only exception is 0.0 in VIS. */
+ The only exception is 0.0 and all-ones in VIS. */
if (TARGET_VIS
&& SCALAR_FLOAT_MODE_P (mode)
- && const_zero_operand (x, mode))
+ && (const_zero_operand (x, mode)
+ || const_all_ones_operand (x, mode)))
return true;
return false;
case CONST_VECTOR:
/* Vector constants are generally not ok.
- The only exception is 0 in VIS. */
+ The only exception is 0 or -1 in VIS. */
if (TARGET_VIS
- && const_zero_operand (x, mode))
+ && (const_zero_operand (x, mode)
+ || const_all_ones_operand (x, mode)))
return true;
return false;
REG+REG address, then only one of them gets converted to an
offsettable address. */
if (mode == TFmode
- && ! (TARGET_FPU && TARGET_ARCH64 && TARGET_HARD_QUAD))
+ && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
return 0;
/* We prohibit REG + REG on ARCH32 if not optimizing for
}
else
{
- if ((REGNO (rs1) >= 32
+ if ((! SPARC_INT_REG_P (REGNO (rs1))
&& REGNO (rs1) != FRAME_POINTER_REGNUM
&& REGNO (rs1) < FIRST_PSEUDO_REGISTER)
|| (rs2
- && (REGNO (rs2) >= 32
+ && (! SPARC_INT_REG_P (REGNO (rs2))
&& REGNO (rs2) != FRAME_POINTER_REGNUM
&& REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
return 0;
if (reg == 0)
{
- gcc_assert (! reload_in_progress && ! reload_completed);
+ gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
}
/* If not during reload, allocate another temp reg here for loading
in the address, so that these instructions can be optimized
properly. */
- rtx temp_reg = ((reload_in_progress || reload_completed)
+ rtx temp_reg = (! can_create_pseudo_p ()
? reg : gen_reg_rtx (Pmode));
/* Must put the SYMBOL_REF inside an UNSPEC here so that cse
if (reg == 0)
{
- gcc_assert (! reload_in_progress && ! reload_completed);
+ gcc_assert (can_create_pseudo_p ());
reg = gen_reg_rtx (Pmode);
}
{
if (SMALL_INT (offset))
return plus_constant (base, INTVAL (offset));
- else if (! reload_in_progress && ! reload_completed)
+ else if (can_create_pseudo_p ())
offset = force_reg (Pmode, offset);
else
/* If we reach here, then something is seriously wrong. */
/* %fcc[0123] */
CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
- /* %icc */
- CC_MODES
+ /* %icc, %sfp, %gsr */
+ CC_MODES, 0, D_MODES
};
static const int hard_64bit_mode_classes[] = {
/* %fcc[0123] */
CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
- /* %icc */
- CC_MODES
+ /* %icc, %sfp, %gsr */
+ CC_MODES, 0, D_MODES
};
int sparc_mode_class [NUM_MACHINE_MODES];
if (reg0 && reg1)
{
- mode = i < 32 ? DImode : DFmode;
+ mode = SPARC_INT_REG_P (i) ? DImode : DFmode;
regno = i;
}
else if (reg0)
{
- mode = i < 32 ? SImode : SFmode;
+ mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
regno = i;
}
else if (reg1)
{
- mode = i < 32 ? SImode : SFmode;
+ mode = SPARC_INT_REG_P (i) ? SImode : SFmode;
regno = i + 1;
offset += 4;
}
save_local_or_in_reg_p, action, SORR_ADVANCE);
}
-/* Generate a save_register_window insn. */
+/* Emit a window_save insn. */
static rtx
-emit_save_register_window (rtx increment)
+emit_window_save (rtx increment)
{
- rtx insn;
-
- insn = emit_insn (gen_save_register_window_1 (increment));
+ rtx insn = emit_insn (gen_window_save (increment));
RTX_FRAME_RELATED_P (insn) = 1;
/* The incoming return address (%o7) is saved in %i7. */
else if (size <= 8192)
{
insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
- /* %sp is still the CFA register. */
RTX_FRAME_RELATED_P (insn) = 1;
+
+ /* %sp is still the CFA register. */
insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
}
else
rtx size_int_rtx = GEN_INT (-size);
if (size <= 4096)
- emit_save_register_window (size_int_rtx);
+ emit_window_save (size_int_rtx);
else if (size <= 8192)
{
- emit_save_register_window (GEN_INT (-4096));
+ emit_window_save (GEN_INT (-4096));
+
/* %sp is not the CFA register anymore. */
emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
+
+ /* Make sure no %fp-based store is issued until after the frame is
+ established. The offset between the frame pointer and the stack
+ pointer is calculated relative to the value of the stack pointer
+ at the end of the function prologue, and moving instructions that
+ access the stack via the frame pointer between the instructions
+ that decrement the stack pointer could result in accessing the
+ register window save area, which is volatile. */
+ emit_insn (gen_frame_blockage ());
}
else
{
rtx size_rtx = gen_rtx_REG (Pmode, 1);
emit_move_insn (size_rtx, size_int_rtx);
- emit_save_register_window (size_rtx);
+ emit_window_save (size_rtx);
}
}
incr = valist;
if (align)
{
- incr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
- size_int (align - 1));
+ incr = fold_build_pointer_plus_hwi (incr, align - 1);
incr = fold_convert (sizetype, incr);
incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
size_int (-align));
addr = incr;
if (BYTES_BIG_ENDIAN && size < rsize)
- addr = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr,
- size_int (rsize - size));
+ addr = fold_build_pointer_plus_hwi (incr, rsize - size);
if (indirect)
{
{
tree tmp = create_tmp_var (type, "va_arg_tmp");
tree dest_addr = build_fold_addr_expr (tmp);
- tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+ tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
3, dest_addr, addr, size_int (rsize));
TREE_ADDRESSABLE (tmp) = 1;
gimplify_and_add (copy, pre_p);
else
addr = fold_convert (ptrtype, addr);
- incr
- = fold_build2 (POINTER_PLUS_EXPR, ptr_type_node, incr, size_int (rsize));
+ incr = fold_build_pointer_plus_hwi (incr, rsize);
gimplify_assign (valist, incr, post_p);
return build_va_arg_indirect_ref (addr);
return 1;
}
+/* Like sparc_splitdi_legitimate but for REG <--> REG moves. */
+
+int
+sparc_split_regreg_legitimate (rtx reg1, rtx reg2)
+{
+ int regno1, regno2;
+
+ if (GET_CODE (reg1) == SUBREG)
+ reg1 = SUBREG_REG (reg1);
+ if (GET_CODE (reg1) != REG)
+ return 0;
+ regno1 = REGNO (reg1);
+
+ if (GET_CODE (reg2) == SUBREG)
+ reg2 = SUBREG_REG (reg2);
+ if (GET_CODE (reg2) != REG)
+ return 0;
+ regno2 = REGNO (reg2);
+
+ if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
+ return 1;
+
+ if (TARGET_VIS3)
+ {
+ if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
+ || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
+ return 1;
+ }
+
+ return 0;
+}
+
/* Return 1 if x and y are some kind of REG and they refer to
different hard registers. This test is guaranteed to be
run after reload. */
return 0;
/* Integer ldd is deprecated in SPARC V9 */
- if (TARGET_V9 && REGNO (reg1) < 32)
+ if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
return 0;
return (REGNO (reg1) == REGNO (reg2) - 1);
if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
return 0;
- if ((reload_in_progress || reload_completed)
+ if (! can_create_pseudo_p ()
&& !strict_memory_address_p (Pmode, XEXP (op, 0)))
return 0;
}
}
return;
- /* These are used by the conditional move instructions. */
- case 'c' :
+ /* This is used by the conditional move instructions. */
case 'C':
{
enum rtx_code rc = GET_CODE (x);
- if (code == 'c')
- {
- enum machine_mode mode = GET_MODE (XEXP (x, 0));
- if (mode == CCFPmode || mode == CCFPEmode)
- rc = reverse_condition_maybe_unordered (GET_CODE (x));
- else
- rc = reverse_condition (GET_CODE (x));
- }
switch (rc)
{
case NE: fputs ("ne", file); break;
case UNGT: fputs ("ug", file); break;
case UNGE: fputs ("uge", file); break;
case UNEQ: fputs ("ue", file); break;
- default: output_operand_lossage (code == 'c'
- ? "invalid %%c operand"
- : "invalid %%C operand");
+ default: output_operand_lossage ("invalid %%C operand");
}
return;
}
- /* These are used by the movr instruction pattern. */
- case 'd':
+ /* This are used by the movr instruction pattern. */
case 'D':
{
- enum rtx_code rc = (code == 'd'
- ? reverse_condition (GET_CODE (x))
- : GET_CODE (x));
+ enum rtx_code rc = GET_CODE (x);
switch (rc)
{
case NE: fputs ("ne", file); break;
case LT: fputs ("lz", file); break;
case LE: fputs ("lez", file); break;
case GT: fputs ("gz", file); break;
- default: output_operand_lossage (code == 'd'
- ? "invalid %%d operand"
- : "invalid %%D operand");
+ default: output_operand_lossage ("invalid %%D operand");
}
return;
}
if (sparc_cpu != PROCESSOR_ULTRASPARC
&& sparc_cpu != PROCESSOR_ULTRASPARC3
&& sparc_cpu != PROCESSOR_NIAGARA
- && sparc_cpu != PROCESSOR_NIAGARA2)
+ && sparc_cpu != PROCESSOR_NIAGARA2
+ && sparc_cpu != PROCESSOR_NIAGARA3
+ && sparc_cpu != PROCESSOR_NIAGARA4)
emit_insn (gen_flush (validize_mem (adjust_address (m_tramp, SImode, 8))));
/* Call __enable_execute_stack after writing onto the stack to make sure
if (sparc_cpu != PROCESSOR_ULTRASPARC
&& sparc_cpu != PROCESSOR_ULTRASPARC3
&& sparc_cpu != PROCESSOR_NIAGARA
- && sparc_cpu != PROCESSOR_NIAGARA2)
+ && sparc_cpu != PROCESSOR_NIAGARA2
+ && sparc_cpu != PROCESSOR_NIAGARA3
+ && sparc_cpu != PROCESSOR_NIAGARA4)
emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
/* Call __enable_execute_stack after writing onto the stack to make sure
sparc_use_sched_lookahead (void)
{
if (sparc_cpu == PROCESSOR_NIAGARA
- || sparc_cpu == PROCESSOR_NIAGARA2)
+ || sparc_cpu == PROCESSOR_NIAGARA2
+ || sparc_cpu == PROCESSOR_NIAGARA3
+ || sparc_cpu == PROCESSOR_NIAGARA4)
return 0;
if (sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_ULTRASPARC3)
{
case PROCESSOR_NIAGARA:
case PROCESSOR_NIAGARA2:
+ case PROCESSOR_NIAGARA3:
+ case PROCESSOR_NIAGARA4:
default:
return 1;
case PROCESSOR_V9:
return 0;
}
-/* Returns assembly code to perform a DImode shift using
- a 64-bit global or out register on SPARC-V8+. */
+/* Output a wide shift instruction in V8+ mode. INSN is the instruction,
+ OPERANDS are its operands and OPCODE is the mnemonic to be used. */
+
const char *
-output_v8plus_shift (rtx *operands, rtx insn, const char *opcode)
+output_v8plus_shift (rtx insn, rtx *operands, const char *opcode)
{
static char asm_code[60];
output_asm_insn ("or\t%L1, %3, %3", operands);
}
- strcpy(asm_code, opcode);
+ strcpy (asm_code, opcode);
if (which_alternative != 2)
return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
else
- return strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
+ return
+ strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
}
\f
/* Output rtl to increment the profiler label LABELNO
}
}
\f
-#define def_builtin(NAME, CODE, TYPE) \
- add_builtin_function((NAME), (TYPE), (CODE), BUILT_IN_MD, NULL, \
- NULL_TREE)
+static tree def_builtin(const char *name, int code, tree type)
+{
+ return add_builtin_function(name, type, code, BUILT_IN_MD, NULL,
+ NULL_TREE);
+}
+
+static tree def_builtin_const(const char *name, int code, tree type)
+{
+ tree t = def_builtin(name, code, type);
+
+ if (t)
+ TREE_READONLY (t) = 1;
+
+ return t;
+}
/* Implement the TARGET_INIT_BUILTINS target hook.
Create builtin functions for special SPARC instructions. */
tree v4hi = build_vector_type (intHI_type_node, 4);
tree v2hi = build_vector_type (intHI_type_node, 2);
tree v2si = build_vector_type (intSI_type_node, 2);
+ tree v1si = build_vector_type (intSI_type_node, 1);
tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
+ tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
+ tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
v8qi, v8qi,
intDI_type_node, 0);
+ tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
+ v8qi, v8qi, 0);
+ tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
+ v8qi, v8qi, 0);
tree di_ftype_di_di = build_function_type_list (intDI_type_node,
intDI_type_node,
intDI_type_node, 0);
+ tree si_ftype_si_si = build_function_type_list (intSI_type_node,
+ intSI_type_node,
+ intSI_type_node, 0);
tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
ptr_type_node,
intSI_type_node, 0);
tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
ptr_type_node,
intDI_type_node, 0);
+ tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
+ ptr_type_node,
+ ptr_type_node, 0);
+ tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
+ ptr_type_node,
+ ptr_type_node, 0);
+ tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
+ v4hi, v4hi, 0);
+ tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
+ v2si, v2si, 0);
+ tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
+ v4hi, v4hi, 0);
+ tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
+ v2si, v2si, 0);
+ tree void_ftype_di = build_function_type_list (void_type_node,
+ intDI_type_node, 0);
+ tree di_ftype_void = build_function_type_list (intDI_type_node,
+ void_type_node, 0);
+ tree void_ftype_si = build_function_type_list (void_type_node,
+ intSI_type_node, 0);
+ tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
+ float_type_node,
+ float_type_node, 0);
+ tree df_ftype_df_df = build_function_type_list (double_type_node,
+ double_type_node,
+ double_type_node, 0);
/* Packing and expanding vectors. */
- def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis, v4qi_ftype_v4hi);
+ def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
+ v4qi_ftype_v4hi);
def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
v8qi_ftype_v2si_v8qi);
def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
v2hi_ftype_v2si);
- def_builtin ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis, v4hi_ftype_v4qi);
- def_builtin ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
- v8qi_ftype_v4qi_v4qi);
+ def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
+ v4hi_ftype_v4qi);
+ def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
+ v8qi_ftype_v4qi_v4qi);
/* Multiplications. */
- def_builtin ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
- v4hi_ftype_v4qi_v4hi);
- def_builtin ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
- v4hi_ftype_v4qi_v2hi);
- def_builtin ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
- v4hi_ftype_v4qi_v2hi);
- def_builtin ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
- v4hi_ftype_v8qi_v4hi);
- def_builtin ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
- v4hi_ftype_v8qi_v4hi);
- def_builtin ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
- v2si_ftype_v4qi_v2hi);
- def_builtin ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
- v2si_ftype_v4qi_v2hi);
+ def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
+ v4hi_ftype_v4qi_v4hi);
+ def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
+ v4hi_ftype_v4qi_v2hi);
+ def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
+ v4hi_ftype_v4qi_v2hi);
+ def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
+ v4hi_ftype_v8qi_v4hi);
+ def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
+ v4hi_ftype_v8qi_v4hi);
+ def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
+ v2si_ftype_v4qi_v2hi);
+ def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
+ v2si_ftype_v4qi_v2hi);
/* Data aligning. */
def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
v8qi_ftype_v8qi_v8qi);
def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
v2si_ftype_v2si_v2si);
- def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatadi_vis,
- di_ftype_di_di);
+ def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
+ di_ftype_di_di);
+
+ def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
+ void_ftype_di);
+ def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
+ di_ftype_void);
+
if (TARGET_ARCH64)
- def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
- ptr_ftype_ptr_di);
+ {
+ def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
+ ptr_ftype_ptr_di);
+ def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
+ ptr_ftype_ptr_di);
+ }
else
- def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
- ptr_ftype_ptr_si);
+ {
+ def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
+ ptr_ftype_ptr_si);
+ def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
+ ptr_ftype_ptr_si);
+ }
/* Pixel distance. */
- def_builtin ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
- di_ftype_v8qi_v8qi_di);
+ def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
+ di_ftype_v8qi_v8qi_di);
+
+ /* Edge handling. */
+ if (TARGET_ARCH64)
+ {
+ def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
+ di_ftype_ptr_ptr);
+ if (TARGET_VIS2)
+ {
+ def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
+ di_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
+ di_ftype_ptr_ptr);
+ }
+ }
+ else
+ {
+ def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
+ si_ftype_ptr_ptr);
+ if (TARGET_VIS2)
+ {
+ def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
+ si_ftype_ptr_ptr);
+ def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
+ si_ftype_ptr_ptr);
+ }
+ }
+
+ /* Pixel compare. */
+ if (TARGET_ARCH64)
+ {
+ def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
+ di_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
+ di_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
+ di_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
+ di_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
+ di_ftype_v2si_v2si);
+ }
+ else
+ {
+ def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
+ si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
+ si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
+ si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
+ si_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
+ si_ftype_v2si_v2si);
+ }
+
+ /* Addition and subtraction. */
+ def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
+ v2hi_ftype_v2hi_v2hi);
+ def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
+ v2si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
+ v1si_ftype_v1si_v1si);
+ def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
+ v2hi_ftype_v2hi_v2hi);
+ def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
+ v2si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
+ v1si_ftype_v1si_v1si);
+
+ /* Three-dimensional array addressing. */
+ if (TARGET_ARCH64)
+ {
+ def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
+ di_ftype_di_di);
+ def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
+ di_ftype_di_di);
+ def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
+ di_ftype_di_di);
+ }
+ else
+ {
+ def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
+ si_ftype_si_si);
+ def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
+ si_ftype_si_si);
+ def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
+ si_ftype_si_si);
+ }
+
+ if (TARGET_VIS2)
+ {
+ /* Byte mask and shuffle */
+ if (TARGET_ARCH64)
+ def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
+ di_ftype_di_di);
+ else
+ def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
+ si_ftype_si_si);
+ def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
+ v8qi_ftype_v8qi_v8qi);
+ def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
+ v2si_ftype_v2si_v2si);
+ def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
+ di_ftype_di_di);
+ }
+
+ if (TARGET_VIS3)
+ {
+ if (TARGET_ARCH64)
+ {
+ def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
+ void_ftype_di);
+ def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
+ void_ftype_di);
+ def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
+ void_ftype_di);
+ }
+ else
+ {
+ def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
+ void_ftype_si);
+ def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
+ void_ftype_si);
+ def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
+ void_ftype_si);
+ }
+
+ def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
+ v4hi_ftype_v4hi_v4hi);
+
+ def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
+ v2si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
+ v2si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
+ v2si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
+ v2si_ftype_v2si_v2si);
+
+ if (TARGET_ARCH64)
+ def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
+ di_ftype_v8qi_v8qi);
+ else
+ def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
+ si_ftype_v8qi_v8qi);
+
+ def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
+ di_ftype_di_di);
+ def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
+ di_ftype_di_di);
+
+ def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
+ v2hi_ftype_v2hi_v2hi);
+ def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
+ v4hi_ftype_v4hi_v4hi);
+ def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
+ v2hi_ftype_v2hi_v2hi);
+ def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
+ v2si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
+ v1si_ftype_v1si_v1si);
+ def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
+ v2si_ftype_v2si_v2si);
+ def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
+ v1si_ftype_v1si_v1si);
+
+ if (TARGET_ARCH64)
+ {
+ def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
+ di_ftype_v8qi_v8qi);
+ def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
+ di_ftype_v8qi_v8qi);
+ def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
+ di_ftype_v8qi_v8qi);
+ def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
+ di_ftype_v8qi_v8qi);
+ }
+ else
+ {
+ def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
+ si_ftype_v8qi_v8qi);
+ def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
+ si_ftype_v8qi_v8qi);
+ def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
+ si_ftype_v8qi_v8qi);
+ def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
+ si_ftype_v8qi_v8qi);
+ }
+
+ def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
+ sf_ftype_sf_sf);
+ def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
+ df_ftype_df_df);
+ def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
+ sf_ftype_sf_sf);
+ def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
+ df_ftype_df_df);
+ def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
+ sf_ftype_sf_sf);
+ def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
+ df_ftype_df_df);
+
+ def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
+ di_ftype_di_di);
+ def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
+ di_ftype_di_di);
+ def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
+ di_ftype_di_di);
+ }
}
/* Handle TARGET_EXPAND_BUILTIN target hook.
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int icode = DECL_FUNCTION_CODE (fndecl);
rtx pat, op[4];
- enum machine_mode mode[4];
int arg_count = 0;
+ bool nonvoid;
- mode[0] = insn_data[icode].operand[0].mode;
- if (!target
- || GET_MODE (target) != mode[0]
- || ! (*insn_data[icode].operand[0].predicate) (target, mode[0]))
- op[0] = gen_reg_rtx (mode[0]);
- else
- op[0] = target;
+ nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+ if (nonvoid)
+ {
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
+ op[0] = gen_reg_rtx (tmode);
+ else
+ op[0] = target;
+ }
FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
{
+ const struct insn_operand_data *insn_op;
+ int idx;
+
+ if (arg == error_mark_node)
+ return NULL_RTX;
+
arg_count++;
- mode[arg_count] = insn_data[icode].operand[arg_count].mode;
+ idx = arg_count - !nonvoid;
+ insn_op = &insn_data[icode].operand[idx];
op[arg_count] = expand_normal (arg);
- if (! (*insn_data[icode].operand[arg_count].predicate) (op[arg_count],
- mode[arg_count]))
- op[arg_count] = copy_to_mode_reg (mode[arg_count], op[arg_count]);
+ if (insn_op->mode == V1DImode
+ && GET_MODE (op[arg_count]) == DImode)
+ op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
+ else if (insn_op->mode == V1SImode
+ && GET_MODE (op[arg_count]) == SImode)
+ op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
+
+ if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
+ insn_op->mode))
+ op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
}
switch (arg_count)
{
+ case 0:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
case 1:
- pat = GEN_FCN (icode) (op[0], op[1]);
+ if (nonvoid)
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ else
+ pat = GEN_FCN (icode) (op[1]);
break;
case 2:
pat = GEN_FCN (icode) (op[0], op[1], op[2]);
emit_insn (pat);
- return op[0];
+ if (nonvoid)
+ return op[0];
+ else
+ return const0_rtx;
}
static int
tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
enum insn_code icode = (enum insn_code) DECL_FUNCTION_CODE (fndecl);
- if (ignore
- && icode != CODE_FOR_alignaddrsi_vis
- && icode != CODE_FOR_alignaddrdi_vis)
- return build_zero_cst (rtype);
+ if (ignore)
+ {
+ /* Note that a switch statement instead of the sequence of tests would
+ be incorrect as many of the CODE_FOR values could be CODE_FOR_nothing
+ and that would yield multiple alternatives with identical values. */
+ if (icode == CODE_FOR_alignaddrsi_vis
+ || icode == CODE_FOR_alignaddrdi_vis
+ || icode == CODE_FOR_wrgsr_vis
+ || icode == CODE_FOR_bmasksi_vis
+ || icode == CODE_FOR_bmaskdi_vis
+ || icode == CODE_FOR_cmask8si_vis
+ || icode == CODE_FOR_cmask8di_vis
+ || icode == CODE_FOR_cmask16si_vis
+ || icode == CODE_FOR_cmask16di_vis
+ || icode == CODE_FOR_cmask32si_vis
+ || icode == CODE_FOR_cmask32di_vis)
+ ;
+ else
+ return build_zero_cst (rtype);
+ }
switch (icode)
{
??? the latencies and then CSE will just use that. */
static bool
-sparc_rtx_costs (rtx x, int code, int outer_code, int *total,
- bool speed ATTRIBUTE_UNUSED)
+sparc_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
+ int *total, bool speed ATTRIBUTE_UNUSED)
{
enum machine_mode mode = GET_MODE (x);
bool float_mode_p = FLOAT_MODE_P (mode);
*total = COSTS_N_INSNS (1);
return false;
+ case FMA:
+ {
+ rtx sub;
+
+ gcc_assert (float_mode_p);
+ *total = sparc_costs->float_mul;
+
+ sub = XEXP (x, 0);
+ if (GET_CODE (sub) == NEG)
+ sub = XEXP (sub, 0);
+ *total += rtx_cost (sub, FMA, 0, speed);
+
+ sub = XEXP (x, 2);
+ if (GET_CODE (sub) == NEG)
+ sub = XEXP (sub, 0);
+ *total += rtx_cost (sub, FMA, 2, speed);
+ return true;
+ }
+
case MULT:
if (float_mode_p)
*total = sparc_costs->float_mul;
sparc_register_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
reg_class_t from, reg_class_t to)
{
- if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
- || (general_or_i64_p (from) && FP_REG_CLASS_P (to))
- || from == FPCC_REGS
- || to == FPCC_REGS)
+ bool need_memory = false;
+
+ if (from == FPCC_REGS || to == FPCC_REGS)
+ need_memory = true;
+ else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
+ || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
+ {
+ if (TARGET_VIS3)
+ {
+ int size = GET_MODE_SIZE (mode);
+ if (size == 8 || size == 4)
+ {
+ if (! TARGET_ARCH32 || size == 4)
+ return 4;
+ else
+ return 6;
+ }
+ }
+ need_memory = true;
+ }
+
+ if (need_memory)
{
if (sparc_cpu == PROCESSOR_ULTRASPARC
|| sparc_cpu == PROCESSOR_ULTRASPARC3
|| sparc_cpu == PROCESSOR_NIAGARA
- || sparc_cpu == PROCESSOR_NIAGARA2)
+ || sparc_cpu == PROCESSOR_NIAGARA2
+ || sparc_cpu == PROCESSOR_NIAGARA3
+ || sparc_cpu == PROCESSOR_NIAGARA4)
return 12;
return 6;
return (vcall_offset >= -32768 || ! fixed_regs[5]);
}
-/* How to allocate a 'struct machine_function'. */
+/* We use the machine specific reorg pass to enable workarounds for errata. */
-static struct machine_function *
-sparc_init_machine_status (void)
+static void
+sparc_reorg (void)
{
- return ggc_alloc_cleared_machine_function ();
-}
+ rtx insn, next;
-/* Locate some local-dynamic symbol still in use by this function
+ /* The only erratum we handle for now is that of the AT697F processor. */
+ if (!sparc_fix_at697f)
+ return;
+
+ /* We need to have the (essentially) final form of the insn stream in order
+ to properly detect the various hazards. Run delay slot scheduling. */
+ if (optimize > 0 && flag_delayed_branch)
+ dbr_schedule (get_insns ());
+
+ /* Now look for specific patterns in the insn stream. */
+ for (insn = get_insns (); insn; insn = next)
+ {
+ bool insert_nop = false;
+ rtx set;
+
+ /* Look for a single-word load into an odd-numbered FP register. */
+ if (NONJUMP_INSN_P (insn)
+ && (set = single_set (insn)) != NULL_RTX
+ && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
+ && MEM_P (SET_SRC (set))
+ && REG_P (SET_DEST (set))
+ && REGNO (SET_DEST (set)) > 31
+ && REGNO (SET_DEST (set)) % 2 != 0)
+ {
+ /* The wrong dependency is on the enclosing double register. */
+ unsigned int x = REGNO (SET_DEST (set)) - 1;
+ unsigned int src1, src2, dest;
+ int code;
+
+ /* If the insn has a delay slot, then it cannot be problematic. */
+ next = next_active_insn (insn);
+ if (NONJUMP_INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE)
+ code = -1;
+ else
+ {
+ extract_insn (next);
+ code = INSN_CODE (next);
+ }
+
+ switch (code)
+ {
+ case CODE_FOR_adddf3:
+ case CODE_FOR_subdf3:
+ case CODE_FOR_muldf3:
+ case CODE_FOR_divdf3:
+ dest = REGNO (recog_data.operand[0]);
+ src1 = REGNO (recog_data.operand[1]);
+ src2 = REGNO (recog_data.operand[2]);
+ if (src1 != src2)
+ {
+ /* Case [1-4]:
+ ld [address], %fx+1
+ FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
+ if ((src1 == x || src2 == x)
+ && (dest == src1 || dest == src2))
+ insert_nop = true;
+ }
+ else
+ {
+ /* Case 5:
+ ld [address], %fx+1
+ FPOPd %fx, %fx, %fx */
+ if (src1 == x
+ && dest == src1
+ && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
+ insert_nop = true;
+ }
+ break;
+
+ case CODE_FOR_sqrtdf2:
+ dest = REGNO (recog_data.operand[0]);
+ src1 = REGNO (recog_data.operand[1]);
+ /* Case 6:
+ ld [address], %fx+1
+ fsqrtd %fx, %fx */
+ if (src1 == x && dest == src1)
+ insert_nop = true;
+ break;
+
+ default:
+ break;
+ }
+ }
+ else
+ next = NEXT_INSN (insn);
+
+ if (insert_nop)
+ emit_insn_after (gen_nop (), insn);
+ }
+}
+
+/* How to allocate a 'struct machine_function'. */
+
+static struct machine_function *
+sparc_init_machine_status (void)
+{
+ return ggc_alloc_cleared_machine_function ();
+}
+
+/* Locate some local-dynamic symbol still in use by this function
so that we can print its name in local-dynamic base patterns. */
static const char *
if (NEED_INDICATE_EXEC_STACK)
file_end_indicate_exec_stack ();
+
+#ifdef TARGET_SOLARIS
+ solaris_file_end ();
+#endif
}
#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
}
#endif
+/* Expand a membar instruction for various use cases. Both the LOAD_STORE
+ and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
+ bit 0 indicates that X is true, and bit 1 indicates Y is true. */
+
+void
+sparc_emit_membar_for_model (enum memmodel model,
+ int load_store, int before_after)
+{
+ /* Bits for the MEMBAR mmask field. */
+ const int LoadLoad = 1;
+ const int StoreLoad = 2;
+ const int LoadStore = 4;
+ const int StoreStore = 8;
+
+ int mm = 0, implied = 0;
+
+ switch (sparc_memory_model)
+ {
+ case SMM_SC:
+ /* Sequential Consistency. All memory transactions are immediately
+ visible in sequential execution order. No barriers needed. */
+ implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
+ break;
+
+ case SMM_TSO:
+ /* Total Store Ordering: all memory transactions with store semantics
+ are followed by an implied StoreStore. */
+ implied |= StoreStore;
+ /* FALLTHRU */
+
+ case SMM_PSO:
+ /* Partial Store Ordering: all memory transactions with load semantics
+ are followed by an implied LoadLoad | LoadStore. */
+ implied |= LoadLoad | LoadStore;
+
+ /* If we're not looking for a raw barrer (before+after), then atomic
+ operations get the benefit of being both load and store. */
+ if (load_store == 3 && before_after == 2)
+ implied |= StoreLoad | StoreStore;
+ /* FALLTHRU */
+
+ case SMM_RMO:
+ /* Relaxed Memory Ordering: no implicit bits. */
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ if (before_after & 1)
+ {
+ if (model == MEMMODEL_ACQUIRE
+ || model == MEMMODEL_ACQ_REL
+ || model == MEMMODEL_SEQ_CST)
+ {
+ if (load_store & 1)
+ mm |= LoadLoad | LoadStore;
+ if (load_store & 2)
+ mm |= StoreLoad | StoreStore;
+ }
+ }
+ if (before_after & 2)
+ {
+ if (model == MEMMODEL_RELEASE
+ || model == MEMMODEL_ACQ_REL
+ || model == MEMMODEL_SEQ_CST)
+ {
+ if (load_store & 1)
+ mm |= LoadLoad | StoreLoad;
+ if (load_store & 2)
+ mm |= LoadStore | StoreStore;
+ }
+ }
+
+ /* Remove the bits implied by the system memory model. */
+ mm &= ~implied;
+
+ /* For raw barriers (before+after), always emit a barrier.
+ This will become a compile-time barrier if needed. */
+ if (mm || before_after == 3)
+ emit_insn (gen_membar (GEN_INT (mm)));
+}
+
/* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
compare and swap on the word containing the byte or half-word. */
-void
-sparc_expand_compare_and_swap_12 (rtx result, rtx mem, rtx oldval, rtx newval)
+static void
+sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
+ rtx oldval, rtx newval)
{
rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
rtx addr = gen_reg_rtx (Pmode);
set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
- val = force_reg (SImode, memsi);
+ val = copy_to_reg (memsi);
emit_insn (gen_rtx_SET (VOIDmode, off,
gen_rtx_XOR (SImode, off,
emit_insn (gen_rtx_SET (VOIDmode, newvalue,
gen_rtx_IOR (SImode, newv, val)));
- emit_insn (gen_sync_compare_and_swapsi (res, memsi, oldvalue, newvalue));
+ emit_move_insn (bool_result, const1_rtx);
+
+ emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
res)));
+ emit_move_insn (bool_result, const0_rtx);
+
cc = gen_compare_reg_1 (NE, resv, val);
emit_insn (gen_rtx_SET (VOIDmode, val, resv));
emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
}
+/* Expand code to perform a compare-and-swap. */
+
+void
+sparc_expand_compare_and_swap (rtx operands[])
+{
+ rtx bval, retval, mem, oldval, newval;
+ enum machine_mode mode;
+ enum memmodel model;
+
+ bval = operands[0];
+ retval = operands[1];
+ mem = operands[2];
+ oldval = operands[3];
+ newval = operands[4];
+ model = (enum memmodel) INTVAL (operands[6]);
+ mode = GET_MODE (mem);
+
+ sparc_emit_membar_for_model (model, 3, 1);
+
+ if (reg_overlap_mentioned_p (retval, oldval))
+ oldval = copy_to_reg (oldval);
+
+ if (mode == QImode || mode == HImode)
+ sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
+ else
+ {
+ rtx (*gen) (rtx, rtx, rtx, rtx);
+ rtx x;
+
+ if (mode == SImode)
+ gen = gen_atomic_compare_and_swapsi_1;
+ else
+ gen = gen_atomic_compare_and_swapdi_1;
+ emit_insn (gen (retval, mem, oldval, newval));
+
+ x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
+ if (x != bval)
+ convert_move (bval, x, 1);
+ }
+
+ sparc_emit_membar_for_model (model, 3, 2);
+}
+
+void
+sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
+{
+ rtx t_1, t_2, t_3;
+
+ sel = gen_lowpart (DImode, sel);
+ switch (vmode)
+ {
+ case V2SImode:
+ /* inp = xxxxxxxAxxxxxxxB */
+ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* t_1 = ....xxxxxxxAxxx. */
+ sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
+ GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
+ t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
+ GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
+ /* sel = .......B */
+ /* t_1 = ...A.... */
+ sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
+ /* sel = ...A...B */
+ sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
+ /* sel = AAAABBBB * 4 */
+ t_1 = force_reg (SImode, GEN_INT (0x01230123));
+ /* sel = { A*4, A*4+1, A*4+2, ... } */
+ break;
+
+ case V4HImode:
+ /* inp = xxxAxxxBxxxCxxxD */
+ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* t_1 = ..xxxAxxxBxxxCxx */
+ /* t_2 = ....xxxAxxxBxxxC */
+ /* t_3 = ......xxxAxxxBxx */
+ sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
+ GEN_INT (0x07),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
+ GEN_INT (0x0700),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
+ GEN_INT (0x070000),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
+ GEN_INT (0x07000000),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* sel = .......D */
+ /* t_1 = .....C.. */
+ /* t_2 = ...B.... */
+ /* t_3 = .A...... */
+ sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
+ t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
+ sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
+ /* sel = .A.B.C.D */
+ sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
+ /* sel = AABBCCDD * 2 */
+ t_1 = force_reg (SImode, GEN_INT (0x01010101));
+ /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
+ break;
+
+ case V8QImode:
+ /* input = xAxBxCxDxExFxGxH */
+ sel = expand_simple_binop (DImode, AND, sel,
+ GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
+ | 0x0f0f0f0f),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* sel = .A.B.C.D.E.F.G.H */
+ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* t_1 = ..A.B.C.D.E.F.G. */
+ sel = expand_simple_binop (DImode, IOR, sel, t_1,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* sel = .AABBCCDDEEFFGGH */
+ sel = expand_simple_binop (DImode, AND, sel,
+ GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
+ | 0xff00ff),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* sel = ..AB..CD..EF..GH */
+ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* t_1 = ....AB..CD..EF.. */
+ sel = expand_simple_binop (DImode, IOR, sel, t_1,
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* sel = ..ABABCDCDEFEFGH */
+ sel = expand_simple_binop (DImode, AND, sel,
+ GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* sel = ....ABCD....EFGH */
+ t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
+ NULL_RTX, 1, OPTAB_DIRECT);
+ /* t_1 = ........ABCD.... */
+ sel = gen_lowpart (SImode, sel);
+ t_1 = gen_lowpart (SImode, t_1);
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Always perform the final addition/merge within the bmask insn. */
+ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
+}
+
/* Implement TARGET_FRAME_POINTER_REQUIRED. */
static bool
for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
leaf_reg_remap [regno] = regno;
}
+ if (TARGET_VIS)
+ global_regs[SPARC_GSR_REG] = 1;
}
-/* Implement TARGET_PREFERRED_RELOAD_CLASS
+/* Implement TARGET_PREFERRED_RELOAD_CLASS:
- We can't load constants into FP registers.
- We can't load FP constants into integer registers when soft-float,
static reg_class_t
sparc_preferred_reload_class (rtx x, reg_class_t rclass)
{
+ enum machine_mode mode = GET_MODE (x);
if (CONSTANT_P (x))
{
if (FP_REG_CLASS_P (rclass)
|| rclass == GENERAL_OR_FP_REGS
|| rclass == GENERAL_OR_EXTRA_FP_REGS
- || (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT && ! TARGET_FPU)
- || (GET_MODE (x) == TFmode && ! const_zero_operand (x, TFmode)))
+ || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
+ || (mode == TFmode && ! const_zero_operand (x, mode)))
return NO_REGS;
- if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
+ if (GET_MODE_CLASS (mode) == MODE_INT)
return GENERAL_REGS;
+
+ if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+ {
+ if (! FP_REG_CLASS_P (rclass)
+ || !(const_zero_operand (x, mode)
+ || const_all_ones_operand (x, mode)))
+ return NO_REGS;
+ }
+ }
+
+ if (TARGET_VIS3
+ && ! TARGET_ARCH64
+ && (rclass == EXTRA_FP_REGS
+ || rclass == GENERAL_OR_EXTRA_FP_REGS))
+ {
+ int regno = true_regnum (x);
+
+ if (SPARC_INT_REG_P (regno))
+ return (rclass == EXTRA_FP_REGS
+ ? FP_REGS : GENERAL_OR_FP_REGS);
}
return rclass;
}
+/* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
+ OPERANDS are its operands and OPCODE is the mnemonic to be used. */
+
+const char *
+output_v8plus_mult (rtx insn, rtx *operands, const char *opcode)
+{
+ char mulstr[32];
+
+ gcc_assert (! TARGET_ARCH64);
+
+ if (sparc_check_64 (operands[1], insn) <= 0)
+ output_asm_insn ("srl\t%L1, 0, %L1", operands);
+ if (which_alternative == 1)
+ output_asm_insn ("sllx\t%H1, 32, %H1", operands);
+ if (GET_CODE (operands[2]) == CONST_INT)
+ {
+ if (which_alternative == 1)
+ {
+ output_asm_insn ("or\t%L1, %H1, %H1", operands);
+ sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
+ output_asm_insn (mulstr, operands);
+ return "srlx\t%L0, 32, %H0";
+ }
+ else
+ {
+ output_asm_insn ("sllx\t%H1, 32, %3", operands);
+ output_asm_insn ("or\t%L1, %3, %3", operands);
+ sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
+ output_asm_insn (mulstr, operands);
+ output_asm_insn ("srlx\t%3, 32, %H0", operands);
+ return "mov\t%3, %L0";
+ }
+ }
+ else if (rtx_equal_p (operands[1], operands[2]))
+ {
+ if (which_alternative == 1)
+ {
+ output_asm_insn ("or\t%L1, %H1, %H1", operands);
+ sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
+ output_asm_insn (mulstr, operands);
+ return "srlx\t%L0, 32, %H0";
+ }
+ else
+ {
+ output_asm_insn ("sllx\t%H1, 32, %3", operands);
+ output_asm_insn ("or\t%L1, %3, %3", operands);
+ sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
+ output_asm_insn (mulstr, operands);
+ output_asm_insn ("srlx\t%3, 32, %H0", operands);
+ return "mov\t%3, %L0";
+ }
+ }
+ if (sparc_check_64 (operands[2], insn) <= 0)
+ output_asm_insn ("srl\t%L2, 0, %L2", operands);
+ if (which_alternative == 1)
+ {
+ output_asm_insn ("or\t%L1, %H1, %H1", operands);
+ output_asm_insn ("sllx\t%H2, 32, %L1", operands);
+ output_asm_insn ("or\t%L2, %L1, %L1", operands);
+ sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
+ output_asm_insn (mulstr, operands);
+ return "srlx\t%L0, 32, %H0";
+ }
+ else
+ {
+ output_asm_insn ("sllx\t%H1, 32, %3", operands);
+ output_asm_insn ("sllx\t%H2, 32, %4", operands);
+ output_asm_insn ("or\t%L1, %3, %3", operands);
+ output_asm_insn ("or\t%L2, %4, %4", operands);
+ sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
+ output_asm_insn (mulstr, operands);
+ output_asm_insn ("srlx\t%3, 32, %H0", operands);
+ return "mov\t%3, %L0";
+ }
+}
+
+/* Subroutine of sparc_expand_vector_init. Emit code to initialize
+ all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
+ and INNER_MODE are the modes describing TARGET. */
+
+static void
+vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
+ enum machine_mode inner_mode)
+{
+ rtx t1, final_insn;
+ int bmask;
+
+ t1 = gen_reg_rtx (mode);
+
+ elt = convert_modes (SImode, inner_mode, elt, true);
+ emit_move_insn (gen_lowpart(SImode, t1), elt);
+
+ switch (mode)
+ {
+ case V2SImode:
+ final_insn = gen_bshufflev2si_vis (target, t1, t1);
+ bmask = 0x45674567;
+ break;
+ case V4HImode:
+ final_insn = gen_bshufflev4hi_vis (target, t1, t1);
+ bmask = 0x67676767;
+ break;
+ case V8QImode:
+ final_insn = gen_bshufflev8qi_vis (target, t1, t1);
+ bmask = 0x77777777;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
+ force_reg (SImode, GEN_INT (bmask))));
+ emit_insn (final_insn);
+}
+
+static void
+vector_init_fpmerge (rtx target, rtx elt, enum machine_mode inner_mode)
+{
+ rtx t1, t2, t3, t3_low;
+
+ t1 = gen_reg_rtx (V4QImode);
+ elt = convert_modes (SImode, inner_mode, elt, true);
+ emit_move_insn (gen_lowpart (SImode, t1), elt);
+
+ t2 = gen_reg_rtx (V4QImode);
+ emit_move_insn (t2, t1);
+
+ t3 = gen_reg_rtx (V8QImode);
+ t3_low = gen_lowpart (V4QImode, t3);
+
+ emit_insn (gen_fpmerge_vis (t3, t1, t2));
+ emit_move_insn (t1, t3_low);
+ emit_move_insn (t2, t3_low);
+
+ emit_insn (gen_fpmerge_vis (t3, t1, t2));
+ emit_move_insn (t1, t3_low);
+ emit_move_insn (t2, t3_low);
+
+ emit_insn (gen_fpmerge_vis (gen_lowpart (V8QImode, target), t1, t2));
+}
+
+static void
+vector_init_faligndata (rtx target, rtx elt, enum machine_mode inner_mode)
+{
+ rtx t1 = gen_reg_rtx (V4HImode);
+
+ elt = convert_modes (SImode, inner_mode, elt, true);
+
+ emit_move_insn (gen_lowpart (SImode, t1), elt);
+
+ emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
+ force_reg (SImode, GEN_INT (6)),
+ CONST0_RTX (SImode)));
+
+ emit_insn (gen_faligndatav4hi_vis (target, t1, target));
+ emit_insn (gen_faligndatav4hi_vis (target, t1, target));
+ emit_insn (gen_faligndatav4hi_vis (target, t1, target));
+ emit_insn (gen_faligndatav4hi_vis (target, t1, target));
+}
+
+/* Emit code to initialize TARGET to values for individual fields VALS. */
+
+void
+sparc_expand_vector_init (rtx target, rtx vals)
+{
+ enum machine_mode mode = GET_MODE (target);
+ enum machine_mode inner_mode = GET_MODE_INNER (mode);
+ int n_elts = GET_MODE_NUNITS (mode);
+ int i, n_var = 0;
+ bool all_same;
+ rtx mem;
+
+ all_same = true;
+ for (i = 0; i < n_elts; i++)
+ {
+ rtx x = XVECEXP (vals, 0, i);
+ if (!CONSTANT_P (x))
+ n_var++;
+
+ if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
+ all_same = false;
+ }
+
+ if (n_var == 0)
+ {
+ emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
+ return;
+ }
+
+ if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
+ {
+ if (GET_MODE_SIZE (inner_mode) == 4)
+ {
+ emit_move_insn (gen_lowpart (SImode, target),
+ gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
+ return;
+ }
+ else if (GET_MODE_SIZE (inner_mode) == 8)
+ {
+ emit_move_insn (gen_lowpart (DImode, target),
+ gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
+ return;
+ }
+ }
+ else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
+ && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
+ {
+ emit_move_insn (gen_highpart (word_mode, target),
+ gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
+ emit_move_insn (gen_lowpart (word_mode, target),
+ gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
+ return;
+ }
+
+ if (all_same && GET_MODE_SIZE (mode) == 8)
+ {
+ if (TARGET_VIS2)
+ {
+ vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
+ return;
+ }
+ if (mode == V8QImode)
+ {
+ vector_init_fpmerge (target, XVECEXP (vals, 0, 0), inner_mode);
+ return;
+ }
+ if (mode == V4HImode)
+ {
+ vector_init_faligndata (target, XVECEXP (vals, 0, 0), inner_mode);
+ return;
+ }
+ }
+
+ mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), 0);
+ for (i = 0; i < n_elts; i++)
+ emit_move_insn (adjust_address_nv (mem, inner_mode,
+ i * GET_MODE_SIZE (inner_mode)),
+ XVECEXP (vals, 0, i));
+ emit_move_insn (target, mem);
+}
+
+/* Implement TARGET_SECONDARY_RELOAD. */
+
+static reg_class_t
+sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+ enum machine_mode mode, secondary_reload_info *sri)
+{
+ enum reg_class rclass = (enum reg_class) rclass_i;
+
+ sri->icode = CODE_FOR_nothing;
+ sri->extra_cost = 0;
+
+ /* We need a temporary when loading/storing a HImode/QImode value
+ between memory and the FPU registers. This can happen when combine puts
+ a paradoxical subreg in a float/fix conversion insn. */
+ if (FP_REG_CLASS_P (rclass)
+ && (mode == HImode || mode == QImode)
+ && (GET_CODE (x) == MEM
+ || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+ && true_regnum (x) == -1)))
+ return GENERAL_REGS;
+
+ /* On 32-bit we need a temporary when loading/storing a DFmode value
+ between unaligned memory and the upper FPU registers. */
+ if (TARGET_ARCH32
+ && rclass == EXTRA_FP_REGS
+ && mode == DFmode
+ && GET_CODE (x) == MEM
+ && ! mem_min_alignment (x, 8))
+ return FP_REGS;
+
+ if (((TARGET_CM_MEDANY
+ && symbolic_operand (x, mode))
+ || (TARGET_CM_EMBMEDANY
+ && text_segment_operand (x, mode)))
+ && ! flag_pic)
+ {
+ if (in_p)
+ sri->icode = direct_optab_handler (reload_in_optab, mode);
+ else
+ sri->icode = direct_optab_handler (reload_out_optab, mode);
+ return NO_REGS;
+ }
+
+ if (TARGET_VIS3 && TARGET_ARCH32)
+ {
+ int regno = true_regnum (x);
+
+ /* When using VIS3 fp<-->int register moves, on 32-bit we have
+ to move 8-byte values in 4-byte pieces. This only works via
+ FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
+ move between EXTRA_FP_REGS and GENERAL_REGS, we will need
+ an FP_REGS intermediate move. */
+ if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
+ || ((general_or_i64_p (rclass)
+ || rclass == GENERAL_OR_FP_REGS)
+ && SPARC_FP_REG_P (regno)))
+ {
+ sri->extra_cost = 2;
+ return FP_REGS;
+ }
+ }
+
+ return NO_REGS;
+}
+
+/* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
+ OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
+
+bool
+sparc_expand_conditional_move (enum machine_mode mode, rtx *operands)
+{
+ enum rtx_code rc = GET_CODE (operands[1]);
+ enum machine_mode cmp_mode;
+ rtx cc_reg, dst, cmp;
+
+ cmp = operands[1];
+ if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
+ return false;
+
+ if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
+ cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
+
+ cmp_mode = GET_MODE (XEXP (cmp, 0));
+ rc = GET_CODE (cmp);
+
+ dst = operands[0];
+ if (! rtx_equal_p (operands[2], dst)
+ && ! rtx_equal_p (operands[3], dst))
+ {
+ if (reg_overlap_mentioned_p (dst, cmp))
+ dst = gen_reg_rtx (mode);
+
+ emit_move_insn (dst, operands[3]);
+ }
+ else if (operands[2] == dst)
+ {
+ operands[2] = operands[3];
+
+ if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
+ rc = reverse_condition_maybe_unordered (rc);
+ else
+ rc = reverse_condition (rc);
+ }
+
+ if (XEXP (cmp, 1) == const0_rtx
+ && GET_CODE (XEXP (cmp, 0)) == REG
+ && cmp_mode == DImode
+ && v9_regcmp_p (rc))
+ cc_reg = XEXP (cmp, 0);
+ else
+ cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
+
+ cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
+
+ emit_insn (gen_rtx_SET (VOIDmode, dst,
+ gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
+
+ if (dst != operands[0])
+ emit_move_insn (operands[0], dst);
+
+ return true;
+}
+
+/* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
+ into OPERANDS[0] in MODE, depending on the outcome of the comparison of
+ OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
+ FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
+ code to be used for the condition mask. */
+
+void
+sparc_expand_vcond (enum machine_mode mode, rtx *operands, int ccode, int fcode)
+{
+ rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
+ enum rtx_code code = GET_CODE (operands[3]);
+
+ mask = gen_reg_rtx (Pmode);
+ cop0 = operands[4];
+ cop1 = operands[5];
+ if (code == LT || code == GE)
+ {
+ rtx t;
+
+ code = swap_condition (code);
+ t = cop0; cop0 = cop1; cop1 = t;
+ }
+
+ gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
+
+ fcmp = gen_rtx_UNSPEC (Pmode,
+ gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
+ fcode);
+
+ cmask = gen_rtx_UNSPEC (DImode,
+ gen_rtvec (2, mask, gsr),
+ ccode);
+
+ bshuf = gen_rtx_UNSPEC (mode,
+ gen_rtvec (3, operands[1], operands[2], gsr),
+ UNSPEC_BSHUFFLE);
+
+ emit_insn (gen_rtx_SET (VOIDmode, mask, fcmp));
+ emit_insn (gen_rtx_SET (VOIDmode, gsr, cmask));
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], bshuf));
+}
+
+/* On sparc, any mode which naturally allocates into the float
+ registers should return 4 here. */
+
+unsigned int
+sparc_regmode_natural_size (enum machine_mode mode)
+{
+ int size = UNITS_PER_WORD;
+
+ if (TARGET_ARCH64)
+ {
+ enum mode_class mclass = GET_MODE_CLASS (mode);
+
+ if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
+ size = 4;
+ }
+
+ return size;
+}
+
+/* Return TRUE if it is a good idea to tie two pseudo registers
+ when one has mode MODE1 and one has mode MODE2.
+ If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
+ for any hard reg, then this must be FALSE for correct output.
+
+ For V9 we have to deal with the fact that only the lower 32 floating
+ point registers are 32-bit addressable. */
+
+bool
+sparc_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
+{
+ enum mode_class mclass1, mclass2;
+ unsigned short size1, size2;
+
+ if (mode1 == mode2)
+ return true;
+
+ mclass1 = GET_MODE_CLASS (mode1);
+ mclass2 = GET_MODE_CLASS (mode2);
+ if (mclass1 != mclass2)
+ return false;
+
+ if (! TARGET_V9)
+ return true;
+
+ /* Classes are the same and we are V9 so we have to deal with upper
+ vs. lower floating point registers. If one of the modes is a
+ 4-byte mode, and the other is not, we have to mark them as not
+ tieable because only the lower 32 floating point register are
+ addressable 32-bits at a time.
+
+ We can't just test explicitly for SFmode, otherwise we won't
+ cover the vector mode cases properly. */
+
+ if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
+ return true;
+
+ size1 = GET_MODE_SIZE (mode1);
+ size2 = GET_MODE_SIZE (mode2);
+ if ((size1 > 4 && size2 == 4)
+ || (size2 > 4 && size1 == 4))
+ return false;
+
+ return true;
+}
+
#include "gt-sparc.h"