/* Subroutines used for code generation on IBM RS/6000.
Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999,
- 2000, 2001, 2002, 2003, 2004, 2005, 2006
+ 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
Free Software Foundation, Inc.
Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
{ (const char *)0, "-mtune=", 1, 0 },
};
+static GTY(()) bool rs6000_cell_dont_microcode;
+
/* Always emit branch hint bits. */
static GTY(()) bool rs6000_always_hint;
/* Schedule instructions for group formation. */
static GTY(()) bool rs6000_sched_groups;
+/* Align branch targets. */
+static GTY(()) bool rs6000_align_branch_targets;
+
/* Support for -msched-costly-dep option. */
const char *rs6000_sched_costly_dep_str;
enum rs6000_dependence_cost rs6000_sched_costly_dep;
int toc_initialized;
char toc_label_name[10];
+/* Cached value of rs6000_variable_issue. This is cached in
+ rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
+static short cached_can_issue_more;
+
static GTY(()) section *read_only_data_section;
static GTY(()) section *private_data_section;
static GTY(()) section *read_only_private_data_section;
COSTS_N_INSNS (21), /* ddiv */
};
+/* Instruction costs on Cell processor. */
+/* COSTS_N_INSNS (1) ~ one add. */
+static const
+struct processor_costs ppccell_cost = {
+ COSTS_N_INSNS (9/2)+2, /* mulsi */
+ COSTS_N_INSNS (6/2), /* mulsi_const */
+ COSTS_N_INSNS (6/2), /* mulsi_const9 */
+ COSTS_N_INSNS (15/2)+2, /* muldi */
+ COSTS_N_INSNS (38/2), /* divsi */
+ COSTS_N_INSNS (70/2), /* divdi */
+ COSTS_N_INSNS (10/2), /* fp */
+ COSTS_N_INSNS (10/2), /* dmul */
+ COSTS_N_INSNS (74/2), /* sdiv */
+ COSTS_N_INSNS (74/2), /* ddiv */
+};
+
/* Instruction costs on PPC750 and PPC7400 processors. */
static const
struct processor_costs ppc750_cost = {
COSTS_N_INSNS (17), /* ddiv */
};
+/* Instruction costs on POWER6 processors. */
+static const
+struct processor_costs power6_cost = {
+ COSTS_N_INSNS (8), /* mulsi */
+ COSTS_N_INSNS (8), /* mulsi_const */
+ COSTS_N_INSNS (8), /* mulsi_const9 */
+ COSTS_N_INSNS (8), /* muldi */
+ COSTS_N_INSNS (22), /* divsi */
+ COSTS_N_INSNS (28), /* divdi */
+ COSTS_N_INSNS (3), /* fp */
+ COSTS_N_INSNS (3), /* dmul */
+ COSTS_N_INSNS (13), /* sdiv */
+ COSTS_N_INSNS (16), /* ddiv */
+};
+
\f
static bool rs6000_function_ok_for_sibcall (tree, tree);
static const char *rs6000_invalid_within_doloop (rtx);
static int rs6000_variable_issue (FILE *, int, rtx, int);
static bool rs6000_rtx_costs (rtx, int, int, int *);
static int rs6000_adjust_cost (rtx, rtx, rtx, int);
+static void rs6000_sched_init (FILE *, int, int);
static bool is_microcoded_insn (rtx);
-static int is_dispatch_slot_restricted (rtx);
+static bool is_nonpipeline_insn (rtx);
static bool is_cracked_insn (rtx);
static bool is_branch_slot_insn (rtx);
+static bool is_load_insn (rtx);
+static rtx get_store_dest (rtx pat);
+static bool is_store_insn (rtx);
+static bool set_to_load_agen (rtx,rtx);
+static bool adjacent_mem_locations (rtx,rtx);
static int rs6000_adjust_priority (rtx, int);
static int rs6000_issue_rate (void);
-static bool rs6000_is_costly_dependence (rtx, rtx, rtx, int, int);
+static bool rs6000_is_costly_dependence (dep_t, int, int);
static rtx get_next_active_insn (rtx, rtx);
static bool insn_terminates_group_p (rtx , enum group_termination);
+static bool insn_must_be_first_in_group (rtx);
+static bool insn_must_be_last_in_group (rtx);
static bool is_costly_group (rtx *, rtx);
static int force_new_group (int, FILE *, rtx *, rtx, bool *, int, int *);
static int redefine_groups (FILE *, int, rtx, rtx);
static int pad_groups (FILE *, int, rtx, rtx);
static void rs6000_sched_finish (FILE *, int);
+static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
+static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
static int rs6000_use_sched_lookahead (void);
+static int rs6000_use_sched_lookahead_guard (rtx);
static tree rs6000_builtin_mask_for_load (void);
+static tree rs6000_builtin_mul_widen_even (tree);
+static tree rs6000_builtin_mul_widen_odd (tree);
+static tree rs6000_builtin_conversion (enum tree_code, tree);
static void def_builtin (int, const char *, tree, int);
static void rs6000_init_builtins (void);
static const char *invalid_arg_for_unprototyped_fn (tree, tree, tree);
#if TARGET_MACHO
static void macho_branch_islands (void);
-static void add_compiler_branch_island (tree, tree, int);
static int no_previous_def (tree function_name);
static tree get_prev_label (tree function_name);
static void rs6000_darwin_file_start (void);
#define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
#undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
#define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT rs6000_sched_init
#undef TARGET_SCHED_FINISH
#define TARGET_SCHED_FINISH rs6000_sched_finish
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER rs6000_sched_reorder
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
+
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN rs6000_builtin_mul_widen_even
+#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
+#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD rs6000_builtin_mul_widen_odd
+#undef TARGET_VECTORIZE_BUILTIN_CONVERSION
+#define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion
#undef TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS rs6000_init_builtins
{"801", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
{"821", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
{"823", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
- {"8540", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+ {"8540", PROCESSOR_PPC8540,
+ POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_STRICT_ALIGN},
/* 8548 has a dummy entry for now. */
- {"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
+ {"8548", PROCESSOR_PPC8540,
+ POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_STRICT_ALIGN},
{"860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
{"970", PROCESSOR_POWER4,
POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
+ {"cell", PROCESSOR_CELL,
+ POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64},
{"common", PROCESSOR_COMMON, MASK_NEW_MNEMONICS},
{"ec603e", PROCESSOR_PPC603, POWERPC_BASE_MASK | MASK_SOFT_FLOAT},
{"G3", PROCESSOR_PPC750, POWERPC_BASE_MASK | MASK_PPC_GFXOPT},
{"power5+", PROCESSOR_POWER5,
POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GFXOPT
| MASK_MFCRF | MASK_POPCNTB | MASK_FPRND},
- {"power6", PROCESSOR_POWER5,
+ {"power6", PROCESSOR_POWER6,
POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB
| MASK_FPRND},
+ {"power6x", PROCESSOR_POWER6,
+ POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB
+ | MASK_FPRND | MASK_MFPGPR},
{"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK},
{"powerpc64", PROCESSOR_POWERPC64,
POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64},
enum {
POWER_MASKS = MASK_POWER | MASK_POWER2 | MASK_MULTIPLE | MASK_STRING,
- POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT
+ POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT | MASK_STRICT_ALIGN
| MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC
| MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW
- | MASK_DLMZB)
+ | MASK_DLMZB | MASK_MFPGPR)
};
rs6000_init_hard_regno_mode_ok ();
if (TARGET_E500)
{
- if (TARGET_ALTIVEC)
- error ("AltiVec and E500 instructions cannot coexist");
-
/* The e500 does not have string instructions, and we set
MASK_STRING above when optimizing for size. */
if ((target_flags & MASK_STRING) != 0)
rs6000_float_gprs = 0;
if (!rs6000_explicit_options.isel)
rs6000_isel = 0;
- if (!rs6000_explicit_options.long_double)
- rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
}
+ /* Detect invalid option combinations with E500. */
+ CHECK_E500_OPTIONS;
+
rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
- && rs6000_cpu != PROCESSOR_POWER5);
+ && rs6000_cpu != PROCESSOR_POWER5
+ && rs6000_cpu != PROCESSOR_POWER6
+ && rs6000_cpu != PROCESSOR_CELL);
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5);
+ rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
+ || rs6000_cpu == PROCESSOR_POWER5
+ || rs6000_cpu == PROCESSOR_POWER6);
rs6000_sched_restricted_insns_priority
= (rs6000_sched_groups ? 1 : 0);
/* Set branch target alignment, if not optimizing for size. */
if (!optimize_size)
{
- if (rs6000_sched_groups)
+ /* Cell wants to be aligned 8byte for dual issue. */
+ if (rs6000_cpu == PROCESSOR_CELL)
+ {
+ if (align_functions <= 0)
+ align_functions = 8;
+ if (align_jumps <= 0)
+ align_jumps = 8;
+ if (align_loops <= 0)
+ align_loops = 8;
+ }
+ if (rs6000_align_branch_targets)
{
if (align_functions <= 0)
align_functions = 16;
rs6000_cost = &ppc630_cost;
break;
+ case PROCESSOR_CELL:
+ rs6000_cost = &ppccell_cost;
+ break;
+
case PROCESSOR_PPC750:
case PROCESSOR_PPC7400:
rs6000_cost = &ppc750_cost;
rs6000_cost = &power4_cost;
break;
+ case PROCESSOR_POWER6:
+ rs6000_cost = &power6_cost;
+ break;
+
default:
gcc_unreachable ();
}
return 0;
}
+/* Implement targetm.vectorize.builtin_conversion. */
+static tree
+rs6000_builtin_conversion (enum tree_code code, tree type)
+{
+ if (!TARGET_ALTIVEC)
+ return NULL_TREE;
+
+ switch (code)
+ {
+ case FLOAT_EXPR:
+ switch (TYPE_MODE (type))
+ {
+ case V4SImode:
+ return TYPE_UNSIGNED (type) ?
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VCFUX] :
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VCFSX];
+ default:
+ return NULL_TREE;
+ }
+ default:
+ return NULL_TREE;
+ }
+}
+
+/* Implement targetm.vectorize.builtin_mul_widen_even. */
+static tree
+rs6000_builtin_mul_widen_even (tree type)
+{
+ if (!TARGET_ALTIVEC)
+ return NULL_TREE;
+
+ switch (TYPE_MODE (type))
+ {
+ case V8HImode:
+ return TYPE_UNSIGNED (type) ?
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUH] :
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESH];
+
+ case V16QImode:
+ return TYPE_UNSIGNED (type) ?
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUB] :
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESB];
+ default:
+ return NULL_TREE;
+ }
+}
+
+/* Implement targetm.vectorize.builtin_mul_widen_odd. */
+static tree
+rs6000_builtin_mul_widen_odd (tree type)
+{
+ if (!TARGET_ALTIVEC)
+ return NULL_TREE;
+
+ switch (TYPE_MODE (type))
+ {
+ case V8HImode:
+ return TYPE_UNSIGNED (type) ?
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUH] :
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSH];
+
+ case V16QImode:
+ return TYPE_UNSIGNED (type) ?
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUB] :
+ rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSB];
+ default:
+ return NULL_TREE;
+ }
+}
+
/* Handle generic options of the form -mfoo=yes/no.
NAME is the option name.
VALUE is the option value.
case OPT_mspe_:
rs6000_explicit_options.spe = true;
rs6000_parse_yes_no_option ("spe", arg, &(rs6000_spe));
- /* No SPE means 64-bit long doubles, even if an E500. */
- if (!rs6000_spe)
- rs6000_long_double_type_size = 64;
break;
case OPT_mdebug_:
bool
invalid_e500_subreg (rtx op, enum machine_mode mode)
{
- /* Reject (subreg:SI (reg:DF)). */
- if (GET_CODE (op) == SUBREG
- && mode == SImode
- && REG_P (SUBREG_REG (op))
- && GET_MODE (SUBREG_REG (op)) == DFmode)
- return true;
+ if (TARGET_E500_DOUBLE)
+ {
+ /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
+ subreg:TI and reg:TF. */
+ if (GET_CODE (op) == SUBREG
+ && (mode == SImode || mode == DImode || mode == TImode)
+ && REG_P (SUBREG_REG (op))
+ && (GET_MODE (SUBREG_REG (op)) == DFmode
+ || GET_MODE (SUBREG_REG (op)) == TFmode))
+ return true;
+
+ /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
+ reg:TI. */
+ if (GET_CODE (op) == SUBREG
+ && (mode == DFmode || mode == TFmode)
+ && REG_P (SUBREG_REG (op))
+ && (GET_MODE (SUBREG_REG (op)) == DImode
+ || GET_MODE (SUBREG_REG (op)) == TImode))
+ return true;
+ }
- /* Reject (subreg:DF (reg:DI)). */
- if (GET_CODE (op) == SUBREG
- && mode == DFmode
+ if (TARGET_SPE
+ && GET_CODE (op) == SUBREG
+ && mode == SImode
&& REG_P (SUBREG_REG (op))
- && GET_MODE (SUBREG_REG (op)) == DImode)
+ && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
return true;
return false;
}
-/* Darwin, AIX increases natural record alignment to doubleword if the first
+/* AIX increases natural record alignment to doubleword if the first
field is an FP double while the FP fields remain word aligned. */
unsigned int
return align;
}
+/* Darwin increases record alignment to the natural alignment of
+ the first field. */
+
+unsigned int
+darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
+ unsigned int specified)
+{
+ unsigned int align = MAX (computed, specified);
+
+ if (TYPE_PACKED (type))
+ return align;
+
+ /* Find the first field, looking down into aggregates. */
+ do {
+ tree field = TYPE_FIELDS (type);
+ /* Skip all non field decls */
+ while (field != NULL && TREE_CODE (field) != FIELD_DECL)
+ field = TREE_CHAIN (field);
+ if (! field)
+ break;
+ type = TREE_TYPE (field);
+ while (TREE_CODE (type) == ARRAY_TYPE)
+ type = TREE_TYPE (type);
+ } while (AGGREGATE_TYPE_P (type));
+
+ if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
+ align = MAX (align, TYPE_ALIGN (type));
+
+ return align;
+}
+
/* Return 1 for an operand in small memory on V.4/eabi. */
int
break;
case TFmode:
+ if (TARGET_E500_DOUBLE)
+ return (SPE_CONST_OFFSET_OK (offset)
+ && SPE_CONST_OFFSET_OK (offset + 8));
+
case TImode:
if (mode == TFmode || !TARGET_POWERPC64)
extra = 12;
if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
return false;
/* Restrict addressing for DI because of our SUBREG hackery. */
- if (TARGET_E500_DOUBLE && (mode == DFmode || mode == DImode))
+ if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+ || mode == DImode))
return false;
x = XEXP (x, 1);
return reg;
}
else if (SPE_VECTOR_MODE (mode)
- || (TARGET_E500_DOUBLE && (mode == DFmode
+ || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DImode)))
{
if (mode == DImode)
emit_move_insn (tmp2, mem);
emit_insn (gen_addsi3 (tmp3, tmp1, tmp2));
last = emit_move_insn (got, tmp3);
- REG_NOTES (last) = gen_rtx_EXPR_LIST (REG_EQUAL, gsym,
- REG_NOTES (last));
+ set_unique_reg_note (last, REG_EQUAL, gsym);
REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last,
REG_NOTES (first));
REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first,
&& REG_MODE_OK_FOR_BASE_P (XEXP (x, 0), mode)
&& GET_CODE (XEXP (x, 1)) == CONST_INT
&& !SPE_VECTOR_MODE (mode)
- && !(TARGET_E500_DOUBLE && (mode == DFmode
+ && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
|| mode == DImode))
&& !ALTIVEC_VECTOR_MODE (mode))
{
&& !SPE_VECTOR_MODE (mode)
&& mode != TFmode
/* Restrict addressing for DI because of our SUBREG hackery. */
- && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == DImode))
+ && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
+ || mode == DImode))
&& TARGET_UPDATE
&& legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
return 1;
case LO_SUM:
return true;
- case PRE_INC:
- case PRE_DEC:
- return TARGET_UPDATE;
+ /* Auto-increment cases are now treated generically in recog.c. */
default:
break;
if (FP_REGNO_P (regno))
return (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
- if (TARGET_E500_DOUBLE && mode == DFmode)
- return 1;
-
if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
return (GET_MODE_SIZE (mode) + UNITS_PER_SPE_WORD - 1) / UNITS_PER_SPE_WORD;
return
(GET_MODE_SIZE (mode) + UNITS_PER_ALTIVEC_WORD - 1) / UNITS_PER_ALTIVEC_WORD;
+ /* The value returned for SCmode in the E500 double case is 2 for
+ ABI compatibility; storing an SCmode value in a single register
+ would require function_arg and rs6000_spe_function_arg to handle
+ SCmode so as to pass the value correctly in a pair of
+ registers. */
+ if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode)
+ return (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
+
return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
}
case SImode:
result = no_new_pseudos ? dest : gen_reg_rtx (SImode);
- emit_insn (gen_rtx_SET (VOIDmode, result,
+ emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
GEN_INT (INTVAL (source)
& (~ (HOST_WIDE_INT) 0xffff))));
emit_insn (gen_rtx_SET (VOIDmode, dest,
- gen_rtx_IOR (SImode, result,
+ gen_rtx_IOR (SImode, copy_rtx (result),
GEN_INT (INTVAL (source) & 0xffff))));
result = dest;
break;
operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
DImode);
- operand2 = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
+ operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
DImode);
emit_move_insn (operand1, GEN_INT (c1));
emit_move_insn (operand2, GEN_INT (c2));
else
emit_move_insn (dest, GEN_INT (ud2 << 16));
if (ud1 != 0)
- emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest),
+ GEN_INT (ud1)));
}
else if ((ud4 == 0xffff && (ud3 & 0x8000))
|| (ud4 == 0 && ! (ud3 & 0x8000)))
emit_move_insn (dest, GEN_INT (ud3 << 16));
if (ud2 != 0)
- emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2)));
- emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (16)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest),
+ GEN_INT (ud2)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+ GEN_INT (16)));
if (ud1 != 0)
- emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest),
+ GEN_INT (ud1)));
}
else
{
emit_move_insn (dest, GEN_INT (ud4 << 16));
if (ud3 != 0)
- emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest),
+ GEN_INT (ud3)));
- emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_ASHIFT (DImode, copy_rtx (dest),
+ GEN_INT (32)));
if (ud2 != 0)
- emit_move_insn (dest, gen_rtx_IOR (DImode, dest,
- GEN_INT (ud2 << 16)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest),
+ GEN_INT (ud2 << 16)));
if (ud1 != 0)
- emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1)));
+ emit_move_insn (copy_rtx (dest),
+ gen_rtx_IOR (DImode, copy_rtx (dest), GEN_INT (ud1)));
}
}
return dest;
{
emit_move_insn (adjust_address (operands[0], SImode, 0),
adjust_address (operands[1], SImode, 0));
- emit_move_insn (adjust_address (operands[0], SImode, 4),
- adjust_address (operands[1], SImode, 4));
+ emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
+ adjust_address (copy_rtx (operands[1]), SImode, 4));
return;
}
if (FP_REGNO_P (regnum) || regnum >= FIRST_PSEUDO_REGISTER)
{
rtx newreg;
- newreg = (no_new_pseudos ? operands[1] : gen_reg_rtx (mode));
+ newreg = (no_new_pseudos ? copy_rtx (operands[1])
+ : gen_reg_rtx (mode));
emit_insn (gen_aux_truncdfsf2 (newreg, operands[1]));
operands[1] = newreg;
}
/* 128-bit constant floating-point values on Darwin should really be
loaded as two parts. */
- if (!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128
+ if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
&& mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
{
/* DImode is used, not DFmode, because simplify_gen_subreg doesn't
know how to get a DFmode SUBREG of a TFmode. */
- rs6000_emit_move (simplify_gen_subreg (DImode, operands[0], mode, 0),
- simplify_gen_subreg (DImode, operands[1], mode, 0),
- DImode);
- rs6000_emit_move (simplify_gen_subreg (DImode, operands[0], mode,
- GET_MODE_SIZE (DImode)),
- simplify_gen_subreg (DImode, operands[1], mode,
- GET_MODE_SIZE (DImode)),
- DImode);
+ enum machine_mode imode = (TARGET_E500_DOUBLE ? DFmode : DImode);
+ rs6000_emit_move (simplify_gen_subreg (imode, operands[0], mode, 0),
+ simplify_gen_subreg (imode, operands[1], mode, 0),
+ imode);
+ rs6000_emit_move (simplify_gen_subreg (imode, operands[0], mode,
+ GET_MODE_SIZE (imode)),
+ simplify_gen_subreg (imode, operands[1], mode,
+ GET_MODE_SIZE (imode)),
+ imode);
return;
}
static rtx
spe_build_register_parallel (enum machine_mode mode, int gregno)
{
- rtx r1, r3;
+ rtx r1, r3, r5, r7;
switch (mode)
{
return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
case DCmode:
+ case TFmode:
r1 = gen_rtx_REG (DImode, gregno);
r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
r3 = gen_rtx_REG (DImode, gregno + 2);
r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
+ case TCmode:
+ r1 = gen_rtx_REG (DImode, gregno);
+ r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
+ r3 = gen_rtx_REG (DImode, gregno + 2);
+ r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
+ r5 = gen_rtx_REG (DImode, gregno + 4);
+ r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
+ r7 = gen_rtx_REG (DImode, gregno + 6);
+ r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
+ return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
+
default:
gcc_unreachable ();
}
/* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
are passed and returned in a pair of GPRs for ABI compatibility. */
- if (TARGET_E500_DOUBLE && (mode == DFmode || mode == DCmode))
+ if (TARGET_E500_DOUBLE && (mode == DFmode || mode == DCmode
+ || mode == TFmode || mode == TCmode))
{
int n_words = rs6000_arg_size (mode, type);
else if (TARGET_SPE_ABI && TARGET_SPE
&& (SPE_VECTOR_MODE (mode)
|| (TARGET_E500_DOUBLE && (mode == DFmode
- || mode == DCmode))))
+ || mode == DCmode
+ || mode == TFmode
+ || mode == TCmode))))
return rs6000_spe_function_arg (cum, mode, type);
else if (abi == ABI_V4)
if (cfun->va_list_gpr_size)
{
- t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
+ t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr,
build_int_cst (NULL_TREE, n_gpr));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
if (cfun->va_list_fpr_size)
{
- t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
+ t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr,
build_int_cst (NULL_TREE, n_fpr));
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
if (words != 0)
t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t,
build_int_cst (NULL_TREE, words * UNITS_PER_WORD));
- t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+ t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
if (cfun->machine->varargs_save_offset)
t = build2 (PLUS_EXPR, TREE_TYPE (sav), t,
build_int_cst (NULL_TREE, cfun->machine->varargs_save_offset));
- t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
+ t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (sav), sav, t);
TREE_SIDE_EFFECTS (t) = 1;
expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
}
u = build2 (MULT_EXPR, integer_type_node, u, size_int (sav_scale));
t = build2 (PLUS_EXPR, ptr_type_node, t, u);
- t = build2 (MODIFY_EXPR, void_type_node, addr, t);
+ t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
gimplify_and_add (t, pre_p);
t = build1 (GOTO_EXPR, void_type_node, lab_over);
{
/* Ensure that we don't find any more args in regs.
Alignment has taken care of the n_reg == 2 gpr case. */
- t = build2 (MODIFY_EXPR, TREE_TYPE (reg), reg, size_int (8));
+ t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (reg), reg, size_int (8));
gimplify_and_add (t, pre_p);
}
}
}
gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
- u = build2 (MODIFY_EXPR, void_type_node, addr, t);
+ u = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
gimplify_and_add (u, pre_p);
t = build2 (PLUS_EXPR, TREE_TYPE (t), t, size_int (size));
- t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
+ t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
gimplify_and_add (t, pre_p);
if (lab_over)
append_to_statement_list (t, pre_p);
}
+ if (STRICT_ALIGNMENT
+ && (TYPE_ALIGN (type)
+ > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
+ {
+ /* The value (of type complex double, for example) may not be
+ aligned in memory in the saved registers, so copy via a
+ temporary. (This is the same code as used for SPARC.) */
+ tree tmp = create_tmp_var (type, "va_arg_tmp");
+ tree dest_addr = build_fold_addr_expr (tmp);
+
+ tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY],
+ 3, dest_addr, addr, size_int (rsize * 4));
+
+ gimplify_and_add (copy, pre_p);
+ addr = dest_addr;
+ }
+
addr = fold_convert (ptrtype, addr);
return build_va_arg_indirect_ref (addr);
}
abort ();
rs6000_builtin_decls[code] =
- lang_hooks.builtin_function (name, type, code, BUILT_IN_MD,
- NULL, NULL_TREE);
+ add_builtin_function (name, type, code, BUILT_IN_MD,
+ NULL, NULL_TREE);
}
}
};
static rtx
-rs6000_expand_unop_builtin (enum insn_code icode, tree arglist, rtx target)
+rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
{
rtx pat;
- tree arg0 = TREE_VALUE (arglist);
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
rtx op0 = expand_normal (arg0);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
}
static rtx
-altivec_expand_abs_builtin (enum insn_code icode, tree arglist, rtx target)
+altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
{
rtx pat, scratch1, scratch2;
- tree arg0 = TREE_VALUE (arglist);
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
rtx op0 = expand_normal (arg0);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
}
static rtx
-rs6000_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
+rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
{
rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
static rtx
altivec_expand_predicate_builtin (enum insn_code icode, const char *opcode,
- tree arglist, rtx target)
+ tree exp, rtx target)
{
rtx pat, scratch;
- tree cr6_form = TREE_VALUE (arglist);
- tree arg0 = TREE_VALUE (TREE_CHAIN (arglist));
- tree arg1 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ tree cr6_form = CALL_EXPR_ARG (exp, 0);
+ tree arg0 = CALL_EXPR_ARG (exp, 1);
+ tree arg1 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
enum machine_mode tmode = SImode;
}
static rtx
-altivec_expand_lv_builtin (enum insn_code icode, tree arglist, rtx target)
+altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
{
rtx pat, addr;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
enum machine_mode tmode = insn_data[icode].operand[0].mode;
enum machine_mode mode0 = Pmode;
enum machine_mode mode1 = Pmode;
}
static rtx
-spe_expand_stv_builtin (enum insn_code icode, tree arglist)
+spe_expand_stv_builtin (enum insn_code icode, tree exp)
{
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
}
static rtx
-altivec_expand_stv_builtin (enum insn_code icode, tree arglist)
+altivec_expand_stv_builtin (enum insn_code icode, tree exp)
{
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
}
static rtx
-rs6000_expand_ternop_builtin (enum insn_code icode, tree arglist, rtx target)
+rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
{
rtx pat;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
static rtx
altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
{
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
tree arg0;
enum machine_mode tmode, mode0;
*expandedp = true;
- arg0 = TREE_VALUE (arglist);
+ arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
tmode = insn_data[icode].operand[0].mode;
mode0 = insn_data[icode].operand[1].mode;
altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
bool *expandedp)
{
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
tree arg0, arg1;
enum machine_mode mode0, mode1;
return NULL_RTX;
}
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
mode0 = insn_data[icode].operand[0].mode;
altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
bool *expandedp)
{
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
tree arg0, arg1, arg2;
enum machine_mode mode0, mode1, mode2;
for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
if (d->code == fcode)
{
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
/* Expand vec_init builtin. */
static rtx
-altivec_expand_vec_init_builtin (tree type, tree arglist, rtx target)
+altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
{
enum machine_mode tmode = TYPE_MODE (type);
enum machine_mode inner_mode = GET_MODE_INNER (tmode);
rtvec v = rtvec_alloc (n_elt);
gcc_assert (VECTOR_MODE_P (tmode));
-
- for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
+ gcc_assert (n_elt == call_expr_nargs (exp));
+
+ for (i = 0; i < n_elt; ++i)
{
- rtx x = expand_normal (TREE_VALUE (arglist));
+ rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
}
- gcc_assert (arglist == NULL);
-
if (!target || !register_operand (target, tmode))
target = gen_reg_rtx (tmode);
/* Expand vec_set builtin. */
static rtx
-altivec_expand_vec_set_builtin (tree arglist)
+altivec_expand_vec_set_builtin (tree exp)
{
enum machine_mode tmode, mode1;
tree arg0, arg1, arg2;
int elt;
rtx op0, op1;
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
+ arg2 = CALL_EXPR_ARG (exp, 2);
tmode = TYPE_MODE (TREE_TYPE (arg0));
mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
/* Expand vec_ext builtin. */
static rtx
-altivec_expand_vec_ext_builtin (tree arglist, rtx target)
+altivec_expand_vec_ext_builtin (tree exp, rtx target)
{
enum machine_mode tmode, mode0;
tree arg0, arg1;
int elt;
rtx op0;
- arg0 = TREE_VALUE (arglist);
- arg1 = TREE_VALUE (TREE_CHAIN (arglist));
+ arg0 = CALL_EXPR_ARG (exp, 0);
+ arg1 = CALL_EXPR_ARG (exp, 1);
op0 = expand_normal (arg0);
elt = get_element_number (TREE_TYPE (arg0), arg1);
struct builtin_description_predicates *dp;
size_t i;
enum insn_code icode;
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
tree arg0;
rtx op0, pat;
enum machine_mode tmode, mode0;
switch (fcode)
{
case ALTIVEC_BUILTIN_STVX:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx, arglist);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx, exp);
case ALTIVEC_BUILTIN_STVEBX:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, arglist);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
case ALTIVEC_BUILTIN_STVEHX:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, arglist);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
case ALTIVEC_BUILTIN_STVEWX:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, arglist);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
case ALTIVEC_BUILTIN_STVXL:
- return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, arglist);
+ return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, exp);
case ALTIVEC_BUILTIN_MFVSCR:
icode = CODE_FOR_altivec_mfvscr;
case ALTIVEC_BUILTIN_MTVSCR:
icode = CODE_FOR_altivec_mtvscr;
- arg0 = TREE_VALUE (arglist);
+ arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
mode0 = insn_data[icode].operand[0].mode;
case ALTIVEC_BUILTIN_DSS:
icode = CODE_FOR_altivec_dss;
- arg0 = TREE_VALUE (arglist);
+ arg0 = CALL_EXPR_ARG (exp, 0);
STRIP_NOPS (arg0);
op0 = expand_normal (arg0);
mode0 = insn_data[icode].operand[0].mode;
case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
- return altivec_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
case ALTIVEC_BUILTIN_VEC_SET_V4SI:
case ALTIVEC_BUILTIN_VEC_SET_V8HI:
case ALTIVEC_BUILTIN_VEC_SET_V16QI:
case ALTIVEC_BUILTIN_VEC_SET_V4SF:
- return altivec_expand_vec_set_builtin (arglist);
+ return altivec_expand_vec_set_builtin (exp);
case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
- return altivec_expand_vec_ext_builtin (arglist, target);
+ return altivec_expand_vec_ext_builtin (exp, target);
default:
break;
d = (struct builtin_description *) bdesc_abs;
for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
if (d->code == fcode)
- return altivec_expand_abs_builtin (d->icode, arglist, target);
+ return altivec_expand_abs_builtin (d->icode, exp, target);
/* Expand the AltiVec predicates. */
dp = (struct builtin_description_predicates *) bdesc_altivec_preds;
for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, dp++)
if (dp->code == fcode)
return altivec_expand_predicate_builtin (dp->icode, dp->opcode,
- arglist, target);
+ exp, target);
/* LV* are funky. We initialized them differently. */
switch (fcode)
{
case ALTIVEC_BUILTIN_LVSL:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
- arglist, target);
+ exp, target);
case ALTIVEC_BUILTIN_LVSR:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
- arglist, target);
+ exp, target);
case ALTIVEC_BUILTIN_LVEBX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
- arglist, target);
+ exp, target);
case ALTIVEC_BUILTIN_LVEHX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
- arglist, target);
+ exp, target);
case ALTIVEC_BUILTIN_LVEWX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
- arglist, target);
+ exp, target);
case ALTIVEC_BUILTIN_LVXL:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl,
- arglist, target);
+ exp, target);
case ALTIVEC_BUILTIN_LVX:
return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx,
- arglist, target);
+ exp, target);
default:
break;
/* Fall through. */
static rtx
spe_expand_builtin (tree exp, rtx target, bool *expandedp)
{
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
tree arg1, arg0;
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
enum insn_code icode;
case SPE_BUILTIN_EVSTWHO:
case SPE_BUILTIN_EVSTWWE:
case SPE_BUILTIN_EVSTWWO:
- arg1 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ arg1 = CALL_EXPR_ARG (exp, 2);
if (TREE_CODE (arg1) != INTEGER_CST
|| TREE_INT_CST_LOW (arg1) & ~0x1f)
{
{
case SPE_BUILTIN_EVSPLATFI:
return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
- arglist, target);
+ exp, target);
case SPE_BUILTIN_EVSPLATI:
return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
- arglist, target);
+ exp, target);
default:
break;
}
d = (struct builtin_description *) bdesc_2arg_spe;
for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
if (d->code == fcode)
- return rs6000_expand_binop_builtin (d->icode, arglist, target);
+ return rs6000_expand_binop_builtin (d->icode, exp, target);
d = (struct builtin_description *) bdesc_spe_predicates;
for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
if (d->code == fcode)
- return spe_expand_predicate_builtin (d->icode, arglist, target);
+ return spe_expand_predicate_builtin (d->icode, exp, target);
d = (struct builtin_description *) bdesc_spe_evsel;
for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
if (d->code == fcode)
- return spe_expand_evsel_builtin (d->icode, arglist, target);
+ return spe_expand_evsel_builtin (d->icode, exp, target);
switch (fcode)
{
case SPE_BUILTIN_EVSTDDX:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
case SPE_BUILTIN_EVSTDHX:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
case SPE_BUILTIN_EVSTDWX:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
case SPE_BUILTIN_EVSTWHEX:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
case SPE_BUILTIN_EVSTWHOX:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
case SPE_BUILTIN_EVSTWWEX:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
case SPE_BUILTIN_EVSTWWOX:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
case SPE_BUILTIN_EVSTDD:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
case SPE_BUILTIN_EVSTDH:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
case SPE_BUILTIN_EVSTDW:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
case SPE_BUILTIN_EVSTWHE:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
case SPE_BUILTIN_EVSTWHO:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
case SPE_BUILTIN_EVSTWWE:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
case SPE_BUILTIN_EVSTWWO:
- return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, arglist);
+ return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
case SPE_BUILTIN_MFSPEFSCR:
icode = CODE_FOR_spe_mfspefscr;
tmode = insn_data[icode].operand[0].mode;
return target;
case SPE_BUILTIN_MTSPEFSCR:
icode = CODE_FOR_spe_mtspefscr;
- arg0 = TREE_VALUE (arglist);
+ arg0 = CALL_EXPR_ARG (exp, 0);
op0 = expand_normal (arg0);
mode0 = insn_data[icode].operand[0].mode;
}
static rtx
-spe_expand_predicate_builtin (enum insn_code icode, tree arglist, rtx target)
+spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
{
rtx pat, scratch, tmp;
- tree form = TREE_VALUE (arglist);
- tree arg0 = TREE_VALUE (TREE_CHAIN (arglist));
- tree arg1 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
+ tree form = CALL_EXPR_ARG (exp, 0);
+ tree arg0 = CALL_EXPR_ARG (exp, 1);
+ tree arg1 = CALL_EXPR_ARG (exp, 2);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
*/
static rtx
-spe_expand_evsel_builtin (enum insn_code icode, tree arglist, rtx target)
+spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
{
rtx pat, scratch;
- tree arg0 = TREE_VALUE (arglist);
- tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
- tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
- tree arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree arg1 = CALL_EXPR_ARG (exp, 1);
+ tree arg2 = CALL_EXPR_ARG (exp, 2);
+ tree arg3 = CALL_EXPR_ARG (exp, 3);
rtx op0 = expand_normal (arg0);
rtx op1 = expand_normal (arg1);
rtx op2 = expand_normal (arg2);
enum machine_mode mode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
- tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
- tree arglist = TREE_OPERAND (exp, 1);
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
struct builtin_description *d;
size_t i;
gcc_assert (TARGET_ALTIVEC);
- arg = TREE_VALUE (arglist);
+ arg = CALL_EXPR_ARG (exp, 0);
gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
addr = memory_address (mode, op);
return target;
}
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (fcode == ALTIVEC_BUILTIN_VCFUX
+ || fcode == ALTIVEC_BUILTIN_VCFSX)
+ {
+ if (call_expr_nargs (exp) == 1)
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ }
+
if (TARGET_ALTIVEC)
{
ret = altivec_expand_builtin (exp, target, &success);
d = (struct builtin_description *) bdesc_1arg;
for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
if (d->code == fcode)
- return rs6000_expand_unop_builtin (d->icode, arglist, target);
+ return rs6000_expand_unop_builtin (d->icode, exp, target);
/* Handle simple binary operations. */
d = (struct builtin_description *) bdesc_2arg;
for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
if (d->code == fcode)
- return rs6000_expand_binop_builtin (d->icode, arglist, target);
+ return rs6000_expand_binop_builtin (d->icode, exp, target);
/* Handle simple ternary operations. */
d = (struct builtin_description *) bdesc_3arg;
for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
if (d->code == fcode)
- return rs6000_expand_ternop_builtin (d->icode, arglist, target);
+ return rs6000_expand_ternop_builtin (d->icode, exp, target);
gcc_unreachable ();
}
/* Initialize target builtin that implements
targetm.vectorize.builtin_mask_for_load. */
- decl = lang_hooks.builtin_function ("__builtin_altivec_mask_for_load",
- v16qi_ftype_long_pcvoid,
- ALTIVEC_BUILTIN_MASK_FOR_LOAD,
- BUILT_IN_MD, NULL,
- tree_cons (get_identifier ("const"),
- NULL_TREE, NULL_TREE));
+ decl = add_builtin_function ("__builtin_altivec_mask_for_load",
+ v16qi_ftype_long_pcvoid,
+ ALTIVEC_BUILTIN_MASK_FOR_LOAD,
+ BUILT_IN_MD, NULL, NULL_TREE);
+ TREE_READONLY (decl) = 1;
/* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
altivec_builtin_mask_for_load = decl;
}
static void
rs6000_init_libfuncs (void)
{
- if (!TARGET_HARD_FLOAT)
- return;
-
if (DEFAULT_ABI != ABI_V4 && TARGET_XCOFF
&& !TARGET_POWER2 && !TARGET_POWERPC)
{
set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
+
+ if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
+ {
+ set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
+ set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
+ set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
+ set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
+ set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
+ set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
+ set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
+ set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
+
+ set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
+ set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
+ set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
+ set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
+ set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
+ set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
+ set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
+ set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
+ }
}
else
{
tmp = XEXP (x, 0);
/* Ugly hack because %y is overloaded. */
- if (TARGET_E500 && GET_MODE_SIZE (GET_MODE (x)) == 8)
+ if ((TARGET_SPE || TARGET_E500_DOUBLE)
+ && (GET_MODE_SIZE (GET_MODE (x)) == 8
+ || GET_MODE (x) == TFmode
+ || GET_MODE (x) == TImode))
{
/* Handle [reg]. */
if (GET_CODE (tmp) == REG)
compare_result = gen_reg_rtx (comp_mode);
/* E500 FP compare instructions on the GPRs. Yuck! */
- if ((TARGET_E500 && !TARGET_FPRS && TARGET_HARD_FLOAT)
+ if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
&& rs6000_compare_fp_p)
{
rtx cmp, or_result, compare_result2;
rs6000_compare_op1);
break;
+ case TFmode:
+ cmp = flag_unsafe_math_optimizations
+ ? gen_tsttfeq_gpr (compare_result, rs6000_compare_op0,
+ rs6000_compare_op1)
+ : gen_cmptfeq_gpr (compare_result, rs6000_compare_op0,
+ rs6000_compare_op1);
+ break;
+
default:
gcc_unreachable ();
}
rs6000_compare_op1);
break;
+ case TFmode:
+ cmp = flag_unsafe_math_optimizations
+ ? gen_tsttfgt_gpr (compare_result, rs6000_compare_op0,
+ rs6000_compare_op1)
+ : gen_cmptfgt_gpr (compare_result, rs6000_compare_op0,
+ rs6000_compare_op1);
+ break;
+
default:
gcc_unreachable ();
}
rs6000_compare_op1);
break;
+ case TFmode:
+ cmp = flag_unsafe_math_optimizations
+ ? gen_tsttflt_gpr (compare_result, rs6000_compare_op0,
+ rs6000_compare_op1)
+ : gen_cmptflt_gpr (compare_result, rs6000_compare_op0,
+ rs6000_compare_op1);
+ break;
+
default:
gcc_unreachable ();
}
rs6000_compare_op1);
break;
+ case TFmode:
+ cmp = flag_unsafe_math_optimizations
+ ? gen_tsttfeq_gpr (compare_result2, rs6000_compare_op0,
+ rs6000_compare_op1)
+ : gen_cmptfeq_gpr (compare_result2, rs6000_compare_op0,
+ rs6000_compare_op1);
+ break;
+
default:
gcc_unreachable ();
}
under flag_finite_math_only we don't bother. */
if (rs6000_compare_fp_p
&& !flag_finite_math_only
- && !(TARGET_HARD_FLOAT && TARGET_E500 && !TARGET_FPRS)
+ && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
&& (code == LE || code == GE
|| code == UNEQ || code == LTGT
|| code == UNGT || code == UNLT))
condition_rtx = rs6000_generate_compare (code);
cond_code = GET_CODE (condition_rtx);
- if (TARGET_E500 && rs6000_compare_fp_p
+ if (rs6000_compare_fp_p
&& !TARGET_FPRS && TARGET_HARD_FLOAT)
{
rtx t;
code = reverse_condition (code);
}
- if ((TARGET_E500 && !TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
+ if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
{
/* The efscmp/tst* instructions twiddle bit 2, which maps nicely
to the GT bit. */
try_again = true;
break;
case NE:
- /* Treat A != B as ~(A==B). */
+ case UNLE:
+ case UNLT:
+ case UNGE:
+ case UNGT:
+ /* Invert condition and try again.
+ e.g., A != B becomes ~(A==B). */
{
+ enum rtx_code rev_code;
enum insn_code nor_code;
- rtx eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1,
- dest_mode);
+ rtx eq_rtx;
+
+ rev_code = reverse_condition_maybe_unordered (rcode);
+ eq_rtx = rs6000_emit_vector_compare (rev_code, op0, op1,
+ dest_mode);
nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code;
gcc_assert (nor_code != CODE_FOR_nothing);
return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
return 0;
}
- else if (TARGET_E500 && TARGET_HARD_FLOAT && !TARGET_FPRS
+ else if (TARGET_HARD_FLOAT && !TARGET_FPRS
&& SCALAR_FLOAT_MODE_P (compare_mode))
return 0;
reg_mode = DFmode;
else if (ALTIVEC_REGNO_P (reg))
reg_mode = V16QImode;
+ else if (TARGET_E500_DOUBLE && mode == TFmode)
+ reg_mode = DFmode;
else
reg_mode = word_mode;
reg_mode_size = GET_MODE_SIZE (reg_mode);
if (! TARGET_ALTIVEC_ABI)
return LAST_ALTIVEC_REGNO + 1;
+ /* On Darwin, the unwind routines are compiled without
+ TARGET_ALTIVEC, and use save_world to save/restore the
+ altivec registers when necessary. */
+ if (DEFAULT_ABI == ABI_DARWIN && current_function_calls_eh_return
+ && ! TARGET_ALTIVEC)
+ return FIRST_ALTIVEC_REGNO + 20;
+
/* Find lowest numbered live register. */
for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
if (regs_ever_live[i])
{
unsigned int i, mask = 0;
+ /* On Darwin, the unwind routines are compiled without
+ TARGET_ALTIVEC, and use save_world to save/restore the
+ call-saved altivec registers when necessary. */
+ if (DEFAULT_ABI == ABI_DARWIN && current_function_calls_eh_return
+ && ! TARGET_ALTIVEC)
+ mask |= 0xFFF;
+
/* First, find out if we use _any_ altivec registers. */
for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
if (regs_ever_live[i])
info_ptr->vrsave_save_offset
= info_ptr->gp_save_offset - info_ptr->vrsave_size;
- /* Align stack so vector save area is on a quadword boundary. */
+ /* Align stack so vector save area is on a quadword boundary.
+ The padding goes above the vectors. */
if (info_ptr->altivec_size != 0)
info_ptr->altivec_padding_size
- = 16 - (-info_ptr->vrsave_save_offset % 16);
+ = info_ptr->vrsave_save_offset & 0xF;
else
info_ptr->altivec_padding_size = 0;
= info_ptr->vrsave_save_offset
- info_ptr->altivec_padding_size
- info_ptr->altivec_size;
+ gcc_assert (info_ptr->altivec_size == 0
+ || info_ptr->altivec_save_offset % 16 == 0);
/* Adjust for AltiVec case. */
info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
if (SPE_VECTOR_MODE (mode))
return true;
- if (TARGET_E500_DOUBLE && mode == DFmode)
+ if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
return true;
}
}
|| cfun->machine->ra_need_lr);
/* For V.4, update stack before we do any saving and set back pointer. */
- if (info->push_p
+ if (! WORLD_SAVE_P (info)
+ && info->push_p
&& (DEFAULT_ABI == ABI_V4
|| current_function_calls_eh_return))
{
int i, j, sz;
rtx treg;
rtvec p;
+ rtx reg0;
/* save_world expects lr in r0. */
+ reg0 = gen_rtx_REG (Pmode, 0);
if (info->lr_save_p)
{
- insn = emit_move_insn (gen_rtx_REG (Pmode, 0),
+ insn = emit_move_insn (reg0,
gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM));
RTX_FRAME_RELATED_P (insn) = 1;
}
&& (!current_function_calls_eh_return
|| info->ehrd_offset == -432)
&& info->vrsave_save_offset == -224
- && info->altivec_save_offset == (-224 -16 -192));
+ && info->altivec_save_offset == -416);
treg = gen_rtx_REG (SImode, 11);
emit_move_insn (treg, GEN_INT (-info->total_size));
in R11. It also clobbers R12, so beware! */
/* Preserve CR2 for save_world prologues */
- sz = 6;
+ sz = 5;
sz += 32 - info->first_gp_reg_save;
sz += 64 - info->first_fp_reg_save;
sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg);
}
- /* Prevent any attempt to delete the setting of r0 and treg! */
- RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
- RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, treg);
- RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode, sp_reg_rtx);
+ /* Explain about use of R0. */
+ if (info->lr_save_p)
+ {
+ rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
+ GEN_INT (info->lr_save_offset
+ + sp_offset));
+ rtx mem = gen_frame_mem (reg_mode, addr);
+
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg0);
+ }
+ /* Explain what happens to the stack pointer. */
+ {
+ rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
+ RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
+ }
insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
rs6000_frame_related (insn, frame_ptr_rtx, info->total_size,
- NULL_RTX, NULL_RTX);
-
- if (current_function_calls_eh_return)
- {
- unsigned int i;
- for (i = 0; ; ++i)
- {
- unsigned int regno = EH_RETURN_DATA_REGNO (i);
- if (regno == INVALID_REGNUM)
- break;
- emit_frame_save (frame_reg_rtx, frame_ptr_rtx, reg_mode, regno,
- info->ehrd_offset + sp_offset
- + reg_size * (int) i,
- info->total_size);
- }
- }
+ treg, GEN_INT (-info->total_size));
+ sp_offset = info->total_size;
}
/* Save AltiVec registers if needed. */
/* ??? There's no need to emit actual instructions here, but it's the
easiest way to get the frame unwind information emitted. */
- if (!WORLD_SAVE_P (info) && current_function_calls_eh_return)
+ if (current_function_calls_eh_return)
{
unsigned int i, regno;
rs6000_stack_t *info;
int restoring_FPRs_inline;
int using_load_multiple;
- int using_mfcr_multiple;
+ int using_mtcr_multiple;
int use_backchain_to_restore_sp;
int sp_offset = 0;
rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
use_backchain_to_restore_sp = (frame_pointer_needed
|| current_function_calls_alloca
|| info->total_size > 32767);
- using_mfcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
+ using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
|| rs6000_cpu == PROCESSOR_PPC603
|| rs6000_cpu == PROCESSOR_PPC750
|| optimize_size);
rtx r12_rtx = gen_rtx_REG (SImode, 12);
int count = 0;
- if (using_mfcr_multiple)
+ if (using_mtcr_multiple)
{
for (i = 0; i < 8; i++)
if (regs_ever_live[CR0_REGNO+i] && ! call_used_regs[CR0_REGNO+i])
gcc_assert (count);
}
- if (using_mfcr_multiple && count > 1)
+ if (using_mtcr_multiple && count > 1)
{
rtvec p;
int ndx;
}
\f
+
+/* The following variable value is the last issued insn. */
+
+static rtx last_scheduled_insn;
+
+/* The following variable helps to balance issuing of load and
+ store instructions */
+
+static int load_store_pendulum;
+
/* Power4 load update and store update instructions are cracked into a
load or store and an integer insn which are executed in the same cycle.
Branches have their own dispatch slot which does not count against the
int verbose ATTRIBUTE_UNUSED,
rtx insn, int more)
{
+ last_scheduled_insn = insn;
if (GET_CODE (PATTERN (insn)) == USE
|| GET_CODE (PATTERN (insn)) == CLOBBER)
+ {
+ cached_can_issue_more = more;
+ return cached_can_issue_more;
+ }
+
+ if (insn_terminates_group_p (insn, current_group))
+ {
+ cached_can_issue_more = 0;
+ return cached_can_issue_more;
+ }
+
+ /* If no reservation, but reach here */
+ if (recog_memoized (insn) < 0)
return more;
if (rs6000_sched_groups)
{
if (is_microcoded_insn (insn))
- return 0;
+ cached_can_issue_more = 0;
else if (is_cracked_insn (insn))
- return more > 2 ? more - 2 : 0;
+ cached_can_issue_more = more > 2 ? more - 2 : 0;
+ else
+ cached_can_issue_more = more - 1;
+
+ return cached_can_issue_more;
}
- return more - 1;
+ if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
+ return 0;
+
+ cached_can_issue_more = more - 1;
+ return cached_can_issue_more;
}
/* Adjust the cost of a scheduling dependency. Return the new cost of
static int
rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
{
+ enum attr_type attr_type;
+
if (! recog_memoized (insn))
return 0;
- if (REG_NOTE_KIND (link) != 0)
- return 0;
+ switch (REG_NOTE_KIND (link))
+ {
+ case REG_DEP_TRUE:
+ {
+ /* Data dependency; DEP_INSN writes a register that INSN reads
+ some cycles later. */
+
+ /* Separate a load from a narrower, dependent store. */
+ if (rs6000_sched_groups
+ && GET_CODE (PATTERN (insn)) == SET
+ && GET_CODE (PATTERN (dep_insn)) == SET
+ && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
+ && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
+ && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
+ > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
+ return cost + 14;
+
+ attr_type = get_attr_type (insn);
+
+ switch (attr_type)
+ {
+ case TYPE_JMPREG:
+ /* Tell the first scheduling pass about the latency between
+ a mtctr and bctr (and mtlr and br/blr). The first
+ scheduling pass will not know about this latency since
+ the mtctr instruction, which has the latency associated
+ to it, will be generated by reload. */
+ return TARGET_POWER ? 5 : 4;
+ case TYPE_BRANCH:
+ /* Leave some extra cycles between a compare and its
+ dependent branch, to inhibit expensive mispredicts. */
+ if ((rs6000_cpu_attr == CPU_PPC603
+ || rs6000_cpu_attr == CPU_PPC604
+ || rs6000_cpu_attr == CPU_PPC604E
+ || rs6000_cpu_attr == CPU_PPC620
+ || rs6000_cpu_attr == CPU_PPC630
+ || rs6000_cpu_attr == CPU_PPC750
+ || rs6000_cpu_attr == CPU_PPC7400
+ || rs6000_cpu_attr == CPU_PPC7450
+ || rs6000_cpu_attr == CPU_POWER4
+ || rs6000_cpu_attr == CPU_POWER5
+ || rs6000_cpu_attr == CPU_CELL)
+ && recog_memoized (dep_insn)
+ && (INSN_CODE (dep_insn) >= 0))
+
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_CMP:
+ case TYPE_COMPARE:
+ case TYPE_DELAYED_COMPARE:
+ case TYPE_IMUL_COMPARE:
+ case TYPE_LMUL_COMPARE:
+ case TYPE_FPCOMPARE:
+ case TYPE_CR_LOGICAL:
+ case TYPE_DELAYED_CR:
+ return cost + 2;
+ default:
+ break;
+ }
+ break;
+
+ case TYPE_STORE:
+ case TYPE_STORE_U:
+ case TYPE_STORE_UX:
+ case TYPE_FPSTORE:
+ case TYPE_FPSTORE_U:
+ case TYPE_FPSTORE_UX:
+ if ((rs6000_cpu == PROCESSOR_POWER6)
+ && recog_memoized (dep_insn)
+ && (INSN_CODE (dep_insn) >= 0))
+ {
+
+ if (GET_CODE (PATTERN (insn)) != SET)
+ /* If this happens, we have to extend this to schedule
+ optimally. Return default for now. */
+ return cost;
+
+ /* Adjust the cost for the case where the value written
+ by a fixed point operation is used as the address
+ gen value on a store. */
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_LOAD:
+ case TYPE_LOAD_U:
+ case TYPE_LOAD_UX:
+ case TYPE_CNTLZ:
+ {
+ if (! store_data_bypass_p (dep_insn, insn))
+ return 4;
+ break;
+ }
+ case TYPE_LOAD_EXT:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_VAR_SHIFT_ROTATE:
+ case TYPE_VAR_DELAYED_COMPARE:
+ {
+ if (! store_data_bypass_p (dep_insn, insn))
+ return 6;
+ break;
+ }
+ case TYPE_INTEGER:
+ case TYPE_COMPARE:
+ case TYPE_FAST_COMPARE:
+ case TYPE_EXTS:
+ case TYPE_SHIFT:
+ case TYPE_INSERT_WORD:
+ case TYPE_INSERT_DWORD:
+ case TYPE_FPLOAD_U:
+ case TYPE_FPLOAD_UX:
+ case TYPE_STORE_U:
+ case TYPE_STORE_UX:
+ case TYPE_FPSTORE_U:
+ case TYPE_FPSTORE_UX:
+ {
+ if (! store_data_bypass_p (dep_insn, insn))
+ return 3;
+ break;
+ }
+ case TYPE_IMUL:
+ case TYPE_IMUL2:
+ case TYPE_IMUL3:
+ case TYPE_LMUL:
+ case TYPE_IMUL_COMPARE:
+ case TYPE_LMUL_COMPARE:
+ {
+ if (! store_data_bypass_p (dep_insn, insn))
+ return 17;
+ break;
+ }
+ case TYPE_IDIV:
+ {
+ if (! store_data_bypass_p (dep_insn, insn))
+ return 45;
+ break;
+ }
+ case TYPE_LDIV:
+ {
+ if (! store_data_bypass_p (dep_insn, insn))
+ return 57;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ break;
+
+ case TYPE_LOAD:
+ case TYPE_LOAD_U:
+ case TYPE_LOAD_UX:
+ case TYPE_LOAD_EXT:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ if ((rs6000_cpu == PROCESSOR_POWER6)
+ && recog_memoized (dep_insn)
+ && (INSN_CODE (dep_insn) >= 0))
+ {
+
+ /* Adjust the cost for the case where the value written
+ by a fixed point instruction is used within the address
+ gen portion of a subsequent load(u)(x) */
+ switch (get_attr_type (dep_insn))
+ {
+ case TYPE_LOAD:
+ case TYPE_LOAD_U:
+ case TYPE_LOAD_UX:
+ case TYPE_CNTLZ:
+ {
+ if (set_to_load_agen (dep_insn, insn))
+ return 4;
+ break;
+ }
+ case TYPE_LOAD_EXT:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_VAR_SHIFT_ROTATE:
+ case TYPE_VAR_DELAYED_COMPARE:
+ {
+ if (set_to_load_agen (dep_insn, insn))
+ return 6;
+ break;
+ }
+ case TYPE_INTEGER:
+ case TYPE_COMPARE:
+ case TYPE_FAST_COMPARE:
+ case TYPE_EXTS:
+ case TYPE_SHIFT:
+ case TYPE_INSERT_WORD:
+ case TYPE_INSERT_DWORD:
+ case TYPE_FPLOAD_U:
+ case TYPE_FPLOAD_UX:
+ case TYPE_STORE_U:
+ case TYPE_STORE_UX:
+ case TYPE_FPSTORE_U:
+ case TYPE_FPSTORE_UX:
+ {
+ if (set_to_load_agen (dep_insn, insn))
+ return 3;
+ break;
+ }
+ case TYPE_IMUL:
+ case TYPE_IMUL2:
+ case TYPE_IMUL3:
+ case TYPE_LMUL:
+ case TYPE_IMUL_COMPARE:
+ case TYPE_LMUL_COMPARE:
+ {
+ if (set_to_load_agen (dep_insn, insn))
+ return 17;
+ break;
+ }
+ case TYPE_IDIV:
+ {
+ if (set_to_load_agen (dep_insn, insn))
+ return 45;
+ break;
+ }
+ case TYPE_LDIV:
+ {
+ if (set_to_load_agen (dep_insn, insn))
+ return 57;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+ break;
+
+ case TYPE_FPLOAD:
+ if ((rs6000_cpu == PROCESSOR_POWER6)
+ && recog_memoized (dep_insn)
+ && (INSN_CODE (dep_insn) >= 0)
+ && (get_attr_type (dep_insn) == TYPE_MFFGPR))
+ return 2;
+
+ default:
+ break;
+ }
- if (REG_NOTE_KIND (link) == 0)
- {
- /* Data dependency; DEP_INSN writes a register that INSN reads
- some cycles later. */
-
- /* Separate a load from a narrower, dependent store. */
- if (rs6000_sched_groups
- && GET_CODE (PATTERN (insn)) == SET
- && GET_CODE (PATTERN (dep_insn)) == SET
- && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
- && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
- && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
- > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
- return cost + 14;
-
- switch (get_attr_type (insn))
- {
- case TYPE_JMPREG:
- /* Tell the first scheduling pass about the latency between
- a mtctr and bctr (and mtlr and br/blr). The first
- scheduling pass will not know about this latency since
- the mtctr instruction, which has the latency associated
- to it, will be generated by reload. */
- return TARGET_POWER ? 5 : 4;
- case TYPE_BRANCH:
- /* Leave some extra cycles between a compare and its
- dependent branch, to inhibit expensive mispredicts. */
- if ((rs6000_cpu_attr == CPU_PPC603
- || rs6000_cpu_attr == CPU_PPC604
- || rs6000_cpu_attr == CPU_PPC604E
- || rs6000_cpu_attr == CPU_PPC620
- || rs6000_cpu_attr == CPU_PPC630
- || rs6000_cpu_attr == CPU_PPC750
- || rs6000_cpu_attr == CPU_PPC7400
- || rs6000_cpu_attr == CPU_PPC7450
- || rs6000_cpu_attr == CPU_POWER4
- || rs6000_cpu_attr == CPU_POWER5)
- && recog_memoized (dep_insn)
- && (INSN_CODE (dep_insn) >= 0)
- && (get_attr_type (dep_insn) == TYPE_CMP
- || get_attr_type (dep_insn) == TYPE_COMPARE
- || get_attr_type (dep_insn) == TYPE_DELAYED_COMPARE
- || get_attr_type (dep_insn) == TYPE_IMUL_COMPARE
- || get_attr_type (dep_insn) == TYPE_LMUL_COMPARE
- || get_attr_type (dep_insn) == TYPE_FPCOMPARE
- || get_attr_type (dep_insn) == TYPE_CR_LOGICAL
- || get_attr_type (dep_insn) == TYPE_DELAYED_CR))
- return cost + 2;
- default:
- break;
- }
/* Fall out to return default cost. */
+ }
+ break;
+
+ case REG_DEP_OUTPUT:
+ /* Output dependency; DEP_INSN writes a register that INSN writes some
+ cycles later. */
+ if ((rs6000_cpu == PROCESSOR_POWER6)
+ && recog_memoized (dep_insn)
+ && (INSN_CODE (dep_insn) >= 0))
+ {
+ attr_type = get_attr_type (insn);
+
+ switch (attr_type)
+ {
+ case TYPE_FP:
+ if (get_attr_type (dep_insn) == TYPE_FP)
+ return 1;
+ break;
+ case TYPE_FPLOAD:
+ if (get_attr_type (dep_insn) == TYPE_MFFGPR)
+ return 2;
+ break;
+ default:
+ break;
+ }
+ }
+ case REG_DEP_ANTI:
+ /* Anti dependency; DEP_INSN reads a register that INSN writes some
+ cycles later. */
+ return 0;
+
+ default:
+ gcc_unreachable ();
}
return cost;
|| GET_CODE (PATTERN (insn)) == CLOBBER)
return false;
+ if (rs6000_cpu_attr == CPU_CELL)
+ return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
+
if (rs6000_sched_groups)
{
enum attr_type type = get_attr_type (insn);
return false;
}
-/* The function returns a nonzero value if INSN can be scheduled only
- as the first insn in a dispatch group ("dispatch-slot restricted").
- In this case, the returned value indicates how many dispatch slots
- the insn occupies (at the beginning of the group).
- Return 0 otherwise. */
-
-static int
-is_dispatch_slot_restricted (rtx insn)
-{
- enum attr_type type;
-
- if (!rs6000_sched_groups)
- return 0;
-
- if (!insn
- || insn == NULL_RTX
- || GET_CODE (insn) == NOTE
- || GET_CODE (PATTERN (insn)) == USE
- || GET_CODE (PATTERN (insn)) == CLOBBER)
- return 0;
-
- type = get_attr_type (insn);
-
- switch (type)
- {
- case TYPE_MFCR:
- case TYPE_MFCRF:
- case TYPE_MTCR:
- case TYPE_DELAYED_CR:
- case TYPE_CR_LOGICAL:
- case TYPE_MTJMPR:
- case TYPE_MFJMPR:
- return 1;
- case TYPE_IDIV:
- case TYPE_LDIV:
- return 2;
- case TYPE_LOAD_L:
- case TYPE_STORE_C:
- case TYPE_ISYNC:
- case TYPE_SYNC:
- return 4;
- default:
- if (rs6000_cpu == PROCESSOR_POWER5
- && is_cracked_insn (insn))
- return 2;
- return 0;
- }
-}
-
/* The function returns true if INSN is cracked into 2 instructions
by the processor (and therefore occupies 2 issue slots). */
return false;
}
+/* The function returns true if out_inst sets a value that is
+ used in the address generation computation of in_insn */
+static bool
+set_to_load_agen (rtx out_insn, rtx in_insn)
+{
+ rtx out_set, in_set;
+
+ /* For performance reasons, only handle the simple case where
+ both loads are a single_set. */
+ out_set = single_set (out_insn);
+ if (out_set)
+ {
+ in_set = single_set (in_insn);
+ if (in_set)
+ return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
+ }
+
+ return false;
+}
+
+/* The function returns true if the target storage location of
+ out_insn is adjacent to the target storage location of in_insn */
+/* Return 1 if memory locations are adjacent. */
+
+static bool
+adjacent_mem_locations (rtx insn1, rtx insn2)
+{
+
+ rtx a = get_store_dest (PATTERN (insn1));
+ rtx b = get_store_dest (PATTERN (insn2));
+
+ if ((GET_CODE (XEXP (a, 0)) == REG
+ || (GET_CODE (XEXP (a, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
+ && (GET_CODE (XEXP (b, 0)) == REG
+ || (GET_CODE (XEXP (b, 0)) == PLUS
+ && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
+ {
+ HOST_WIDE_INT val0 = 0, val1 = 0;
+ rtx reg0, reg1;
+ int val_diff;
+
+ if (GET_CODE (XEXP (a, 0)) == PLUS)
+ {
+ reg0 = XEXP (XEXP (a, 0), 0);
+ val0 = INTVAL (XEXP (XEXP (a, 0), 1));
+ }
+ else
+ reg0 = XEXP (a, 0);
+
+ if (GET_CODE (XEXP (b, 0)) == PLUS)
+ {
+ reg1 = XEXP (XEXP (b, 0), 0);
+ val1 = INTVAL (XEXP (XEXP (b, 0), 1));
+ }
+ else
+ reg1 = XEXP (b, 0);
+
+ val_diff = val1 - val0;
+
+ return ((REGNO (reg0) == REGNO (reg1))
+ && (val_diff == INTVAL (MEM_SIZE (a))
+ || val_diff == -INTVAL (MEM_SIZE (b))));
+ }
+
+ return false;
+}
+
/* A C statement (sans semicolon) to update the integer scheduling
priority INSN_PRIORITY (INSN). Increase the priority to execute the
INSN earlier, reduce the priority to execute INSN later. Do not
}
#endif
- if (is_dispatch_slot_restricted (insn)
+ if (insn_must_be_first_in_group (insn)
&& reload_completed
&& current_sched_info->sched_max_insns_priority
&& rs6000_sched_restricted_insns_priority)
return (priority + 1);
}
+ if (rs6000_cpu == PROCESSOR_POWER6
+ && ((load_store_pendulum == -2 && is_load_insn (insn))
+ || (load_store_pendulum == 2 && is_store_insn (insn))))
+ /* Attach highest priority to insn if the scheduler has just issued two
+ stores and this instruction is a load, or two loads and this instruction
+ is a store. Power6 wants loads and stores scheduled alternately
+ when possible */
+ return current_sched_info->sched_max_insns_priority;
+
return priority;
}
+/* Return true if the instruction is nonpipelined on the Cell. */
+static bool
+is_nonpipeline_insn (rtx insn)
+{
+ enum attr_type type;
+ if (!insn || !INSN_P (insn)
+ || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ return false;
+
+ type = get_attr_type (insn);
+ if (type == TYPE_IMUL
+ || type == TYPE_IMUL2
+ || type == TYPE_IMUL3
+ || type == TYPE_LMUL
+ || type == TYPE_IDIV
+ || type == TYPE_LDIV
+ || type == TYPE_SDIV
+ || type == TYPE_DDIV
+ || type == TYPE_SSQRT
+ || type == TYPE_DSQRT
+ || type == TYPE_MFCR
+ || type == TYPE_MFCRF
+ || type == TYPE_MFJMPR)
+ {
+ return true;
+ }
+ return false;
+}
+
+
/* Return how many instructions the machine can issue per cycle. */
static int
case CPU_PPC750:
case CPU_PPC7400:
case CPU_PPC8540:
+ case CPU_CELL:
return 2;
case CPU_RIOS2:
case CPU_PPC604:
return 4;
case CPU_POWER4:
case CPU_POWER5:
+ case CPU_POWER6:
return 5;
default:
return 1;
{
if (rs6000_cpu_attr == CPU_PPC8540)
return 4;
+ if (rs6000_cpu_attr == CPU_CELL)
+ return (reload_completed ? 8 : 0);
return 0;
}
+/* We are choosing insn from the ready queue. Return nonzero if INSN can be chosen. */
+static int
+rs6000_use_sched_lookahead_guard (rtx insn)
+{
+ if (rs6000_cpu_attr != CPU_CELL)
+ return 1;
+
+ if (insn == NULL_RTX || !INSN_P (insn))
+ abort ();
+
+ if (!reload_completed
+ || is_nonpipeline_insn (insn)
+ || is_microcoded_insn (insn))
+ return 0;
+
+ return 1;
+}
+
/* Determine is PAT refers to memory. */
static bool
return is_store_insn1 (PATTERN (insn));
}
+/* Return the dest of a store insn. */
+
+static rtx
+get_store_dest (rtx pat)
+{
+ gcc_assert (is_store_insn1 (pat));
+
+ if (GET_CODE (pat) == SET)
+ return SET_DEST (pat);
+ else if (GET_CODE (pat) == PARALLEL)
+ {
+ int i;
+
+ for (i = 0; i < XVECLEN (pat, 0); i++)
+ {
+ rtx inner_pat = XVECEXP (pat, 0, i);
+ if (GET_CODE (inner_pat) == SET
+ && is_mem_ref (SET_DEST (inner_pat)))
+ return inner_pat;
+ }
+ }
+ /* We shouldn't get here, because we should have either a simple
+ store insn or a store with update which are covered above. */
+ gcc_unreachable();
+}
+
/* Returns whether the dependence between INSN and NEXT is considered
costly by the given target. */
static bool
-rs6000_is_costly_dependence (rtx insn, rtx next, rtx link, int cost,
- int distance)
+rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
{
+ rtx insn;
+ rtx next;
+
/* If the flag is not enabled - no dependence is considered costly;
allow all dependent insns in the same group.
This is the most aggressive option. */
if (rs6000_sched_costly_dep == all_deps_costly)
return true;
+ insn = DEP_PRO (dep);
+ next = DEP_CON (dep);
+
if (rs6000_sched_costly_dep == store_to_load_dep_costly
&& is_load_insn (next)
&& is_store_insn (insn))
if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
&& is_load_insn (next)
&& is_store_insn (insn)
- && (!link || (int) REG_NOTE_KIND (link) == 0))
+ && DEP_KIND (dep) == REG_DEP_TRUE)
/* Prevent load after store in the same group if it is a true
dependence. */
return true;
return insn;
}
+/* We are about to begin issuing insns for this clock cycle. */
+
+static int
+rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
+ rtx *ready ATTRIBUTE_UNUSED,
+ int *pn_ready ATTRIBUTE_UNUSED,
+ int clock_var ATTRIBUTE_UNUSED)
+{
+ int n_ready = *pn_ready;
+
+ if (sched_verbose)
+ fprintf (dump, "// rs6000_sched_reorder :\n");
+
+ /* Reorder the ready list, if the second to last ready insn
+ is a nonepipeline insn. */
+ if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
+ {
+ if (is_nonpipeline_insn (ready[n_ready - 1])
+ && (recog_memoized (ready[n_ready - 2]) > 0))
+ /* Simply swap first two insns. */
+ {
+ rtx tmp = ready[n_ready - 1];
+ ready[n_ready - 1] = ready[n_ready - 2];
+ ready[n_ready - 2] = tmp;
+ }
+ }
+
+ if (rs6000_cpu == PROCESSOR_POWER6)
+ load_store_pendulum = 0;
+
+ return rs6000_issue_rate ();
+}
+
+/* Like rs6000_sched_reorder, but called after issuing each insn. */
+
+static int
+rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
+ int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
+{
+ if (sched_verbose)
+ fprintf (dump, "// rs6000_sched_reorder2 :\n");
+
+ /* For Power6, we need to handle some special cases to try and keep the
+ store queue from overflowing and triggering expensive flushes.
+
+ This code monitors how load and store instructions are being issued
+ and skews the ready list one way or the other to increase the likelihood
+ that a desired instruction is issued at the proper time.
+
+ A couple of things are done. First, we maintain a "load_store_pendulum"
+ to track the current state of load/store issue.
+
+ - If the pendulum is at zero, then no loads or stores have been
+ issued in the current cycle so we do nothing.
+
+ - If the pendulum is 1, then a single load has been issued in this
+ cycle and we attempt to locate another load in the ready list to
+ issue with it.
+
+ - If the pendulum is -2, then two stores have already been
+ issued in this cycle, so we increase the priority of the first load
+ in the ready list to increase it's likelihood of being chosen first
+ in the next cycle.
+
+ - If the pendulum is -1, then a single store has been issued in this
+ cycle and we attempt to locate another store in the ready list to
+ issue with it, preferring a store to an adjacent memory location to
+ facilitate store pairing in the store queue.
+
+ - If the pendulum is 2, then two loads have already been
+ issued in this cycle, so we increase the priority of the first store
+ in the ready list to increase it's likelihood of being chosen first
+ in the next cycle.
+
+ - If the pendulum < -2 or > 2, then do nothing.
+
+ Note: This code covers the most common scenarios. There exist non
+ load/store instructions which make use of the LSU and which
+ would need to be accounted for to strictly model the behavior
+ of the machine. Those instructions are currently unaccounted
+ for to help minimize compile time overhead of this code.
+ */
+ if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
+ {
+ int pos;
+ int i;
+ rtx tmp;
+
+ if (is_store_insn (last_scheduled_insn))
+ /* Issuing a store, swing the load_store_pendulum to the left */
+ load_store_pendulum--;
+ else if (is_load_insn (last_scheduled_insn))
+ /* Issuing a load, swing the load_store_pendulum to the right */
+ load_store_pendulum++;
+ else
+ return cached_can_issue_more;
+
+ /* If the pendulum is balanced, or there is only one instruction on
+ the ready list, then all is well, so return. */
+ if ((load_store_pendulum == 0) || (*pn_ready <= 1))
+ return cached_can_issue_more;
+
+ if (load_store_pendulum == 1)
+ {
+ /* A load has been issued in this cycle. Scan the ready list
+ for another load to issue with it */
+ pos = *pn_ready-1;
+
+ while (pos >= 0)
+ {
+ if (is_load_insn (ready[pos]))
+ {
+ /* Found a load. Move it to the head of the ready list,
+ and adjust it's priority so that it is more likely to
+ stay there */
+ tmp = ready[pos];
+ for (i=pos; i<*pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ if INSN_PRIORITY_KNOWN (tmp)
+ INSN_PRIORITY (tmp)++;
+ break;
+ }
+ pos--;
+ }
+ }
+ else if (load_store_pendulum == -2)
+ {
+ /* Two stores have been issued in this cycle. Increase the
+ priority of the first load in the ready list to favor it for
+ issuing in the next cycle. */
+ pos = *pn_ready-1;
+
+ while (pos >= 0)
+ {
+ if (is_load_insn (ready[pos])
+ && INSN_PRIORITY_KNOWN (ready[pos]))
+ {
+ INSN_PRIORITY (ready[pos])++;
+
+ /* Adjust the pendulum to account for the fact that a load
+ was found and increased in priority. This is to prevent
+ increasing the priority of multiple loads */
+ load_store_pendulum--;
+
+ break;
+ }
+ pos--;
+ }
+ }
+ else if (load_store_pendulum == -1)
+ {
+ /* A store has been issued in this cycle. Scan the ready list for
+ another store to issue with it, preferring a store to an adjacent
+ memory location */
+ int first_store_pos = -1;
+
+ pos = *pn_ready-1;
+
+ while (pos >= 0)
+ {
+ if (is_store_insn (ready[pos]))
+ {
+ /* Maintain the index of the first store found on the
+ list */
+ if (first_store_pos == -1)
+ first_store_pos = pos;
+
+ if (is_store_insn (last_scheduled_insn)
+ && adjacent_mem_locations (last_scheduled_insn,ready[pos]))
+ {
+ /* Found an adjacent store. Move it to the head of the
+ ready list, and adjust it's priority so that it is
+ more likely to stay there */
+ tmp = ready[pos];
+ for (i=pos; i<*pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ if INSN_PRIORITY_KNOWN (tmp)
+ INSN_PRIORITY (tmp)++;
+ first_store_pos = -1;
+
+ break;
+ };
+ }
+ pos--;
+ }
+
+ if (first_store_pos >= 0)
+ {
+ /* An adjacent store wasn't found, but a non-adjacent store was,
+ so move the non-adjacent store to the front of the ready
+ list, and adjust its priority so that it is more likely to
+ stay there. */
+ tmp = ready[first_store_pos];
+ for (i=first_store_pos; i<*pn_ready-1; i++)
+ ready[i] = ready[i + 1];
+ ready[*pn_ready-1] = tmp;
+ if INSN_PRIORITY_KNOWN (tmp)
+ INSN_PRIORITY (tmp)++;
+ }
+ }
+ else if (load_store_pendulum == 2)
+ {
+ /* Two loads have been issued in this cycle. Increase the priority
+ of the first store in the ready list to favor it for issuing in
+ the next cycle. */
+ pos = *pn_ready-1;
+
+ while (pos >= 0)
+ {
+ if (is_store_insn (ready[pos])
+ && INSN_PRIORITY_KNOWN (ready[pos]))
+ {
+ INSN_PRIORITY (ready[pos])++;
+
+ /* Adjust the pendulum to account for the fact that a store
+ was found and increased in priority. This is to prevent
+ increasing the priority of multiple stores */
+ load_store_pendulum++;
+
+ break;
+ }
+ pos--;
+ }
+ }
+ }
+
+ return cached_can_issue_more;
+}
+
/* Return whether the presence of INSN causes a dispatch group termination
of group WHICH_GROUP.
static bool
insn_terminates_group_p (rtx insn, enum group_termination which_group)
{
- enum attr_type type;
+ bool first, last;
if (! insn)
return false;
- type = get_attr_type (insn);
+ first = insn_must_be_first_in_group (insn);
+ last = insn_must_be_last_in_group (insn);
- if (is_microcoded_insn (insn))
+ if (first && last)
return true;
if (which_group == current_group)
- {
- if (is_branch_slot_insn (insn))
- return true;
- return false;
- }
+ return last;
else if (which_group == previous_group)
+ return first;
+
+ return false;
+}
+
+
+static bool
+insn_must_be_first_in_group (rtx insn)
+{
+ enum attr_type type;
+
+ if (!insn
+ || insn == NULL_RTX
+ || GET_CODE (insn) == NOTE
+ || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ return false;
+
+ switch (rs6000_cpu)
{
- if (is_dispatch_slot_restricted (insn))
- return true;
- return false;
+ case PROCESSOR_POWER5:
+ if (is_cracked_insn (insn))
+ return true;
+ case PROCESSOR_POWER4:
+ if (is_microcoded_insn (insn))
+ return true;
+
+ if (!rs6000_sched_groups)
+ return false;
+
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_MFCR:
+ case TYPE_MFCRF:
+ case TYPE_MTCR:
+ case TYPE_DELAYED_CR:
+ case TYPE_CR_LOGICAL:
+ case TYPE_MTJMPR:
+ case TYPE_MFJMPR:
+ case TYPE_IDIV:
+ case TYPE_LDIV:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_ISYNC:
+ case TYPE_SYNC:
+ return true;
+ default:
+ break;
+ }
+ break;
+ case PROCESSOR_POWER6:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_INSERT_DWORD:
+ case TYPE_EXTS:
+ case TYPE_CNTLZ:
+ case TYPE_SHIFT:
+ case TYPE_VAR_SHIFT_ROTATE:
+ case TYPE_TRAP:
+ case TYPE_IMUL:
+ case TYPE_IMUL2:
+ case TYPE_IMUL3:
+ case TYPE_LMUL:
+ case TYPE_IDIV:
+ case TYPE_INSERT_WORD:
+ case TYPE_DELAYED_COMPARE:
+ case TYPE_IMUL_COMPARE:
+ case TYPE_LMUL_COMPARE:
+ case TYPE_FPCOMPARE:
+ case TYPE_MFCR:
+ case TYPE_MTCR:
+ case TYPE_MFJMPR:
+ case TYPE_MTJMPR:
+ case TYPE_ISYNC:
+ case TYPE_SYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_LOAD_U:
+ case TYPE_LOAD_UX:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_STORE_U:
+ case TYPE_STORE_UX:
+ case TYPE_FPLOAD_U:
+ case TYPE_FPLOAD_UX:
+ case TYPE_FPSTORE_U:
+ case TYPE_FPSTORE_UX:
+ return true;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return false;
+}
+
+static bool
+insn_must_be_last_in_group (rtx insn)
+{
+ enum attr_type type;
+
+ if (!insn
+ || insn == NULL_RTX
+ || GET_CODE (insn) == NOTE
+ || GET_CODE (PATTERN (insn)) == USE
+ || GET_CODE (PATTERN (insn)) == CLOBBER)
+ return false;
+
+ switch (rs6000_cpu) {
+ case PROCESSOR_POWER4:
+ case PROCESSOR_POWER5:
+ if (is_microcoded_insn (insn))
+ return true;
+
+ if (is_branch_slot_insn (insn))
+ return true;
+
+ break;
+ case PROCESSOR_POWER6:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_EXTS:
+ case TYPE_CNTLZ:
+ case TYPE_SHIFT:
+ case TYPE_VAR_SHIFT_ROTATE:
+ case TYPE_TRAP:
+ case TYPE_IMUL:
+ case TYPE_IMUL2:
+ case TYPE_IMUL3:
+ case TYPE_LMUL:
+ case TYPE_IDIV:
+ case TYPE_DELAYED_COMPARE:
+ case TYPE_IMUL_COMPARE:
+ case TYPE_LMUL_COMPARE:
+ case TYPE_FPCOMPARE:
+ case TYPE_MFCR:
+ case TYPE_MTCR:
+ case TYPE_MFJMPR:
+ case TYPE_MTJMPR:
+ case TYPE_ISYNC:
+ case TYPE_SYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ return true;
+ default:
+ break;
}
+ break;
+ default:
+ break;
+ }
return false;
}
is_costly_group (rtx *group_insns, rtx next_insn)
{
int i;
- rtx link;
- int cost;
int issue_rate = rs6000_issue_rate ();
for (i = 0; i < issue_rate; i++)
{
+ dep_link_t link;
rtx insn = group_insns[i];
+
if (!insn)
continue;
- for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1))
+
+ FOR_EACH_DEP_LINK (link, INSN_FORW_DEPS (insn))
{
- rtx next = XEXP (link, 0);
- if (next == next_insn)
- {
- cost = insn_cost (insn, link, next_insn);
- if (rs6000_is_costly_dependence (insn, next_insn, link, cost, 0))
- return true;
- }
+ dep_t dep = DEP_LINK_DEP (link);
+ rtx next = DEP_CON (dep);
+
+ if (next == next_insn
+ && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
+ return true;
}
}
return group_count;
}
+/* We're beginning a new block. Initialize data structures as necessary. */
+
+static void
+rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
+ int sched_verbose ATTRIBUTE_UNUSED,
+ int max_ready ATTRIBUTE_UNUSED)
+{
+ last_scheduled_insn = NULL_RTX;
+ load_store_pendulum = 0;
+}
+
/* The following function is called at the end of scheduling BB.
After reload, it inserts nops at insn group bundling. */
return 0;
}
+#ifndef DARWIN_LINKER_GENERATES_ISLANDS
+#define DARWIN_LINKER_GENERATES_ISLANDS 0
+#endif
+
+/* KEXTs still need branch islands. */
+#define DARWIN_GENERATE_ISLANDS (!DARWIN_LINKER_GENERATES_ISLANDS \
+ || flag_mkernel || flag_apple_kext)
+
/* INSN is either a function call or a millicode call. It may have an
unconditional jump in its delay slot.
int cookie_operand_number)
{
static char buf[256];
- if (GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
+ if (DARWIN_GENERATE_ISLANDS
+ && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
&& (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
{
tree labelname;
case MINUS:
if (mode == DFmode)
{
- if (GET_CODE (XEXP (x, 0)) == MULT)
+ if (GET_CODE (XEXP (x, 0)) == MULT
+ || GET_CODE (XEXP (x, 1)) == MULT)
{
/* FNMA accounted in outer NEG. */
if (outer_code == NEG)
- *total = 0;
+ *total = rs6000_cost->dmul - rs6000_cost->fp;
else
*total = rs6000_cost->dmul;
}
gen_rtx_MULT (DFmode, v0, y3), u0)));
}
+
+/* Emit popcount intrinsic on TARGET_POPCNTB targets. DST is the
+ target, and SRC is the argument operand. */
+
+void
+rs6000_emit_popcount (rtx dst, rtx src)
+{
+ enum machine_mode mode = GET_MODE (dst);
+ rtx tmp1, tmp2;
+
+ tmp1 = gen_reg_rtx (mode);
+
+ if (mode == SImode)
+ {
+ emit_insn (gen_popcntbsi2 (tmp1, src));
+ tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
+ NULL_RTX, 0);
+ tmp2 = force_reg (SImode, tmp2);
+ emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
+ }
+ else
+ {
+ emit_insn (gen_popcntbdi2 (tmp1, src));
+ tmp2 = expand_mult (DImode, tmp1,
+ GEN_INT ((HOST_WIDE_INT)
+ 0x01010101 << 32 | 0x01010101),
+ NULL_RTX, 0);
+ tmp2 = force_reg (DImode, tmp2);
+ emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
+ }
+}
+
+
+/* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
+ target, and SRC is the argument operand. */
+
+void
+rs6000_emit_parity (rtx dst, rtx src)
+{
+ enum machine_mode mode = GET_MODE (dst);
+ rtx tmp;
+
+ tmp = gen_reg_rtx (mode);
+ if (mode == SImode)
+ {
+ /* Is mult+shift >= shift+xor+shift+xor? */
+ if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
+ {
+ rtx tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = gen_reg_rtx (SImode);
+ emit_insn (gen_popcntbsi2 (tmp1, src));
+
+ tmp2 = gen_reg_rtx (SImode);
+ emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
+ tmp3 = gen_reg_rtx (SImode);
+ emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
+
+ tmp4 = gen_reg_rtx (SImode);
+ emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
+ emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
+ }
+ else
+ rs6000_emit_popcount (tmp, src);
+ emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
+ }
+ else
+ {
+ /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
+ if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
+ {
+ rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
+
+ tmp1 = gen_reg_rtx (DImode);
+ emit_insn (gen_popcntbdi2 (tmp1, src));
+
+ tmp2 = gen_reg_rtx (DImode);
+ emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
+ tmp3 = gen_reg_rtx (DImode);
+ emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
+
+ tmp4 = gen_reg_rtx (DImode);
+ emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
+ tmp5 = gen_reg_rtx (DImode);
+ emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
+
+ tmp6 = gen_reg_rtx (DImode);
+ emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
+ emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
+ }
+ else
+ rs6000_emit_popcount (tmp, src);
+ emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
+ }
+}
+
/* Return an RTX representing where to find the function value of a
function returning MODE. */
static rtx
&& ALTIVEC_VECTOR_MODE (mode))
regno = ALTIVEC_ARG_RETURN;
else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
- && (mode == DFmode || mode == DCmode))
+ && (mode == DFmode || mode == DCmode
+ || mode == TFmode || mode == TCmode))
return spe_build_register_parallel (mode, GP_ARG_RETURN);
else
regno = GP_ARG_RETURN;
else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
return rs6000_complex_function_value (mode);
else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
- && (mode == DFmode || mode == DCmode))
+ && (mode == DFmode || mode == DCmode
+ || mode == TFmode || mode == TCmode))
return spe_build_register_parallel (mode, GP_ARG_RETURN);
else
regno = GP_ARG_RETURN;