X-Git-Url: http://git.sourceforge.jp/view?p=pf3gnuchains%2Fgcc-fork.git;a=blobdiff_plain;f=gcc%2Fconfig%2Frs6000%2Frs6000.c;h=5389c318cf4d8eba8bf85445f023607b81286a3e;hp=d8d0db10b7c6b5085179c8dfc4fa4d3eb781ecc5;hb=c2f47e150f3c68a813f92460462c2e70155f2c67;hpb=aa8b5d0754cb919f0a671cc4124343b5fda80079 diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index d8d0db10b7c..5389c318cf4 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -1,6 +1,6 @@ /* Subroutines used for code generation on IBM RS/6000. Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, - 2000, 2001, 2002, 2003, 2004, 2005, 2006 + 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc. Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu) @@ -139,12 +139,17 @@ struct rs6000_cpu_select rs6000_select[3] = { (const char *)0, "-mtune=", 1, 0 }, }; +static GTY(()) bool rs6000_cell_dont_microcode; + /* Always emit branch hint bits. */ static GTY(()) bool rs6000_always_hint; /* Schedule instructions for group formation. */ static GTY(()) bool rs6000_sched_groups; +/* Align branch targets. */ +static GTY(()) bool rs6000_align_branch_targets; + /* Support for -msched-costly-dep option. */ const char *rs6000_sched_costly_dep_str; enum rs6000_dependence_cost rs6000_sched_costly_dep; @@ -235,6 +240,10 @@ static enum { int toc_initialized; char toc_label_name[10]; +/* Cached value of rs6000_variable_issue. This is cached in + rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */ +static short cached_can_issue_more; + static GTY(()) section *read_only_data_section; static GTY(()) section *private_data_section; static GTY(()) section *read_only_private_data_section; @@ -512,6 +521,22 @@ struct processor_costs ppc630_cost = { COSTS_N_INSNS (21), /* ddiv */ }; +/* Instruction costs on Cell processor. */ +/* COSTS_N_INSNS (1) ~ one add. */ +static const +struct processor_costs ppccell_cost = { + COSTS_N_INSNS (9/2)+2, /* mulsi */ + COSTS_N_INSNS (6/2), /* mulsi_const */ + COSTS_N_INSNS (6/2), /* mulsi_const9 */ + COSTS_N_INSNS (15/2)+2, /* muldi */ + COSTS_N_INSNS (38/2), /* divsi */ + COSTS_N_INSNS (70/2), /* divdi */ + COSTS_N_INSNS (10/2), /* fp */ + COSTS_N_INSNS (10/2), /* dmul */ + COSTS_N_INSNS (74/2), /* sdiv */ + COSTS_N_INSNS (74/2), /* ddiv */ +}; + /* Instruction costs on PPC750 and PPC7400 processors. */ static const struct processor_costs ppc750_cost = { @@ -572,6 +597,21 @@ struct processor_costs power4_cost = { COSTS_N_INSNS (17), /* ddiv */ }; +/* Instruction costs on POWER6 processors. */ +static const +struct processor_costs power6_cost = { + COSTS_N_INSNS (8), /* mulsi */ + COSTS_N_INSNS (8), /* mulsi_const */ + COSTS_N_INSNS (8), /* mulsi_const9 */ + COSTS_N_INSNS (8), /* muldi */ + COSTS_N_INSNS (22), /* divsi */ + COSTS_N_INSNS (28), /* divdi */ + COSTS_N_INSNS (3), /* fp */ + COSTS_N_INSNS (3), /* dmul */ + COSTS_N_INSNS (13), /* sdiv */ + COSTS_N_INSNS (16), /* ddiv */ +}; + static bool rs6000_function_ok_for_sibcall (tree, tree); static const char *rs6000_invalid_within_doloop (rtx); @@ -647,22 +687,36 @@ static void rs6000_xcoff_file_end (void); static int rs6000_variable_issue (FILE *, int, rtx, int); static bool rs6000_rtx_costs (rtx, int, int, int *); static int rs6000_adjust_cost (rtx, rtx, rtx, int); +static void rs6000_sched_init (FILE *, int, int); static bool is_microcoded_insn (rtx); -static int is_dispatch_slot_restricted (rtx); +static bool is_nonpipeline_insn (rtx); static bool is_cracked_insn (rtx); static bool is_branch_slot_insn (rtx); +static bool is_load_insn (rtx); +static rtx get_store_dest (rtx pat); +static bool is_store_insn (rtx); +static bool set_to_load_agen (rtx,rtx); +static bool adjacent_mem_locations (rtx,rtx); static int rs6000_adjust_priority (rtx, int); static int rs6000_issue_rate (void); -static bool rs6000_is_costly_dependence (rtx, rtx, rtx, int, int); +static bool rs6000_is_costly_dependence (dep_t, int, int); static rtx get_next_active_insn (rtx, rtx); static bool insn_terminates_group_p (rtx , enum group_termination); +static bool insn_must_be_first_in_group (rtx); +static bool insn_must_be_last_in_group (rtx); static bool is_costly_group (rtx *, rtx); static int force_new_group (int, FILE *, rtx *, rtx, bool *, int, int *); static int redefine_groups (FILE *, int, rtx, rtx); static int pad_groups (FILE *, int, rtx, rtx); static void rs6000_sched_finish (FILE *, int); +static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int); +static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int); static int rs6000_use_sched_lookahead (void); +static int rs6000_use_sched_lookahead_guard (rtx); static tree rs6000_builtin_mask_for_load (void); +static tree rs6000_builtin_mul_widen_even (tree); +static tree rs6000_builtin_mul_widen_odd (tree); +static tree rs6000_builtin_conversion (enum tree_code, tree); static void def_builtin (int, const char *, tree, int); static void rs6000_init_builtins (void); @@ -744,7 +798,6 @@ static int rs6000_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, static const char *invalid_arg_for_unprototyped_fn (tree, tree, tree); #if TARGET_MACHO static void macho_branch_islands (void); -static void add_compiler_branch_island (tree, tree, int); static int no_previous_def (tree function_name); static tree get_prev_label (tree function_name); static void rs6000_darwin_file_start (void); @@ -908,14 +961,29 @@ static const char alt_reg_names[][8] = #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence +#undef TARGET_SCHED_INIT +#define TARGET_SCHED_INIT rs6000_sched_init #undef TARGET_SCHED_FINISH #define TARGET_SCHED_FINISH rs6000_sched_finish +#undef TARGET_SCHED_REORDER +#define TARGET_SCHED_REORDER rs6000_sched_reorder +#undef TARGET_SCHED_REORDER2 +#define TARGET_SCHED_REORDER2 rs6000_sched_reorder2 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead +#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD +#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard + #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load +#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN +#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN rs6000_builtin_mul_widen_even +#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD +#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD rs6000_builtin_mul_widen_odd +#undef TARGET_VECTORIZE_BUILTIN_CONVERSION +#define TARGET_VECTORIZE_BUILTIN_CONVERSION rs6000_builtin_conversion #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS rs6000_init_builtins @@ -1169,12 +1237,16 @@ rs6000_override_options (const char *default_cpu) {"801", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, {"821", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, {"823", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, - {"8540", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_PPC_GFXOPT}, + {"8540", PROCESSOR_PPC8540, + POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_STRICT_ALIGN}, /* 8548 has a dummy entry for now. */ - {"8548", PROCESSOR_PPC8540, POWERPC_BASE_MASK | MASK_PPC_GFXOPT}, + {"8548", PROCESSOR_PPC8540, + POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_STRICT_ALIGN}, {"860", PROCESSOR_MPCCORE, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, {"970", PROCESSOR_POWER4, POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64}, + {"cell", PROCESSOR_CELL, + POWERPC_7400_MASK | MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64}, {"common", PROCESSOR_COMMON, MASK_NEW_MNEMONICS}, {"ec603e", PROCESSOR_PPC603, POWERPC_BASE_MASK | MASK_SOFT_FLOAT}, {"G3", PROCESSOR_PPC750, POWERPC_BASE_MASK | MASK_PPC_GFXOPT}, @@ -1194,9 +1266,12 @@ rs6000_override_options (const char *default_cpu) {"power5+", PROCESSOR_POWER5, POWERPC_BASE_MASK | MASK_POWERPC64 | MASK_PPC_GFXOPT | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND}, - {"power6", PROCESSOR_POWER5, + {"power6", PROCESSOR_POWER6, POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND}, + {"power6x", PROCESSOR_POWER6, + POWERPC_7400_MASK | MASK_POWERPC64 | MASK_MFCRF | MASK_POPCNTB + | MASK_FPRND | MASK_MFPGPR}, {"powerpc", PROCESSOR_POWERPC, POWERPC_BASE_MASK}, {"powerpc64", PROCESSOR_POWERPC64, POWERPC_BASE_MASK | MASK_PPC_GFXOPT | MASK_POWERPC64}, @@ -1220,10 +1295,10 @@ rs6000_override_options (const char *default_cpu) enum { POWER_MASKS = MASK_POWER | MASK_POWER2 | MASK_MULTIPLE | MASK_STRING, - POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT + POWERPC_MASKS = (POWERPC_BASE_MASK | MASK_PPC_GPOPT | MASK_STRICT_ALIGN | MASK_PPC_GFXOPT | MASK_POWERPC64 | MASK_ALTIVEC | MASK_MFCRF | MASK_POPCNTB | MASK_FPRND | MASK_MULHW - | MASK_DLMZB) + | MASK_DLMZB | MASK_MFPGPR) }; rs6000_init_hard_regno_mode_ok (); @@ -1373,9 +1448,6 @@ rs6000_override_options (const char *default_cpu) if (TARGET_E500) { - if (TARGET_ALTIVEC) - error ("AltiVec and E500 instructions cannot coexist"); - /* The e500 does not have string instructions, and we set MASK_STRING above when optimizing for size. */ if ((target_flags & MASK_STRING) != 0) @@ -1394,14 +1466,20 @@ rs6000_override_options (const char *default_cpu) rs6000_float_gprs = 0; if (!rs6000_explicit_options.isel) rs6000_isel = 0; - if (!rs6000_explicit_options.long_double) - rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE; } + /* Detect invalid option combinations with E500. */ + CHECK_E500_OPTIONS; + rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4 - && rs6000_cpu != PROCESSOR_POWER5); + && rs6000_cpu != PROCESSOR_POWER5 + && rs6000_cpu != PROCESSOR_POWER6 + && rs6000_cpu != PROCESSOR_CELL); rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5); + rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4 + || rs6000_cpu == PROCESSOR_POWER5 + || rs6000_cpu == PROCESSOR_POWER6); rs6000_sched_restricted_insns_priority = (rs6000_sched_groups ? 1 : 0); @@ -1470,7 +1548,17 @@ rs6000_override_options (const char *default_cpu) /* Set branch target alignment, if not optimizing for size. */ if (!optimize_size) { - if (rs6000_sched_groups) + /* Cell wants to be aligned 8byte for dual issue. */ + if (rs6000_cpu == PROCESSOR_CELL) + { + if (align_functions <= 0) + align_functions = 8; + if (align_jumps <= 0) + align_jumps = 8; + if (align_loops <= 0) + align_loops = 8; + } + if (rs6000_align_branch_targets) { if (align_functions <= 0) align_functions = 16; @@ -1551,6 +1639,10 @@ rs6000_override_options (const char *default_cpu) rs6000_cost = &ppc630_cost; break; + case PROCESSOR_CELL: + rs6000_cost = &ppccell_cost; + break; + case PROCESSOR_PPC750: case PROCESSOR_PPC7400: rs6000_cost = &ppc750_cost; @@ -1569,6 +1661,10 @@ rs6000_override_options (const char *default_cpu) rs6000_cost = &power4_cost; break; + case PROCESSOR_POWER6: + rs6000_cost = &power6_cost; + break; + default: gcc_unreachable (); } @@ -1584,6 +1680,76 @@ rs6000_builtin_mask_for_load (void) return 0; } +/* Implement targetm.vectorize.builtin_conversion. */ +static tree +rs6000_builtin_conversion (enum tree_code code, tree type) +{ + if (!TARGET_ALTIVEC) + return NULL_TREE; + + switch (code) + { + case FLOAT_EXPR: + switch (TYPE_MODE (type)) + { + case V4SImode: + return TYPE_UNSIGNED (type) ? + rs6000_builtin_decls[ALTIVEC_BUILTIN_VCFUX] : + rs6000_builtin_decls[ALTIVEC_BUILTIN_VCFSX]; + default: + return NULL_TREE; + } + default: + return NULL_TREE; + } +} + +/* Implement targetm.vectorize.builtin_mul_widen_even. */ +static tree +rs6000_builtin_mul_widen_even (tree type) +{ + if (!TARGET_ALTIVEC) + return NULL_TREE; + + switch (TYPE_MODE (type)) + { + case V8HImode: + return TYPE_UNSIGNED (type) ? + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUH] : + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESH]; + + case V16QImode: + return TYPE_UNSIGNED (type) ? + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUB] : + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESB]; + default: + return NULL_TREE; + } +} + +/* Implement targetm.vectorize.builtin_mul_widen_odd. */ +static tree +rs6000_builtin_mul_widen_odd (tree type) +{ + if (!TARGET_ALTIVEC) + return NULL_TREE; + + switch (TYPE_MODE (type)) + { + case V8HImode: + return TYPE_UNSIGNED (type) ? + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUH] : + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSH]; + + case V16QImode: + return TYPE_UNSIGNED (type) ? + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUB] : + rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSB]; + default: + return NULL_TREE; + } +} + /* Handle generic options of the form -mfoo=yes/no. NAME is the option name. VALUE is the option value. @@ -1750,9 +1916,6 @@ rs6000_handle_option (size_t code, const char *arg, int value) case OPT_mspe_: rs6000_explicit_options.spe = true; rs6000_parse_yes_no_option ("spe", arg, &(rs6000_spe)); - /* No SPE means 64-bit long doubles, even if an E500. */ - if (!rs6000_spe) - rs6000_long_double_type_size = 64; break; case OPT_mdebug_: @@ -2574,24 +2737,38 @@ build_mask64_2_operands (rtx in, rtx *out) bool invalid_e500_subreg (rtx op, enum machine_mode mode) { - /* Reject (subreg:SI (reg:DF)). */ - if (GET_CODE (op) == SUBREG - && mode == SImode - && REG_P (SUBREG_REG (op)) - && GET_MODE (SUBREG_REG (op)) == DFmode) - return true; + if (TARGET_E500_DOUBLE) + { + /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or + subreg:TI and reg:TF. */ + if (GET_CODE (op) == SUBREG + && (mode == SImode || mode == DImode || mode == TImode) + && REG_P (SUBREG_REG (op)) + && (GET_MODE (SUBREG_REG (op)) == DFmode + || GET_MODE (SUBREG_REG (op)) == TFmode)) + return true; + + /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and + reg:TI. */ + if (GET_CODE (op) == SUBREG + && (mode == DFmode || mode == TFmode) + && REG_P (SUBREG_REG (op)) + && (GET_MODE (SUBREG_REG (op)) == DImode + || GET_MODE (SUBREG_REG (op)) == TImode)) + return true; + } - /* Reject (subreg:DF (reg:DI)). */ - if (GET_CODE (op) == SUBREG - && mode == DFmode + if (TARGET_SPE + && GET_CODE (op) == SUBREG + && mode == SImode && REG_P (SUBREG_REG (op)) - && GET_MODE (SUBREG_REG (op)) == DImode) + && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op)))) return true; return false; } -/* Darwin, AIX increases natural record alignment to doubleword if the first +/* AIX increases natural record alignment to doubleword if the first field is an FP double while the FP fields remain word aligned. */ unsigned int @@ -2618,6 +2795,37 @@ rs6000_special_round_type_align (tree type, unsigned int computed, return align; } +/* Darwin increases record alignment to the natural alignment of + the first field. */ + +unsigned int +darwin_rs6000_special_round_type_align (tree type, unsigned int computed, + unsigned int specified) +{ + unsigned int align = MAX (computed, specified); + + if (TYPE_PACKED (type)) + return align; + + /* Find the first field, looking down into aggregates. */ + do { + tree field = TYPE_FIELDS (type); + /* Skip all non field decls */ + while (field != NULL && TREE_CODE (field) != FIELD_DECL) + field = TREE_CHAIN (field); + if (! field) + break; + type = TREE_TYPE (field); + while (TREE_CODE (type) == ARRAY_TYPE) + type = TREE_TYPE (type); + } while (AGGREGATE_TYPE_P (type)); + + if (! AGGREGATE_TYPE_P (type) && type != error_mark_node) + align = MAX (align, TYPE_ALIGN (type)); + + return align; +} + /* Return 1 for an operand in small memory on V.4/eabi. */ int @@ -2806,6 +3014,10 @@ rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x, int strict) break; case TFmode: + if (TARGET_E500_DOUBLE) + return (SPE_CONST_OFFSET_OK (offset) + && SPE_CONST_OFFSET_OK (offset + 8)); + case TImode: if (mode == TFmode || !TARGET_POWERPC64) extra = 12; @@ -2884,7 +3096,8 @@ legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict) if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict)) return false; /* Restrict addressing for DI because of our SUBREG hackery. */ - if (TARGET_E500_DOUBLE && (mode == DFmode || mode == DImode)) + if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode + || mode == DImode)) return false; x = XEXP (x, 1); @@ -2982,7 +3195,7 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, return reg; } else if (SPE_VECTOR_MODE (mode) - || (TARGET_E500_DOUBLE && (mode == DFmode + || (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode || mode == DImode))) { if (mode == DImode) @@ -3180,8 +3393,7 @@ rs6000_legitimize_tls_address (rtx addr, enum tls_model model) emit_move_insn (tmp2, mem); emit_insn (gen_addsi3 (tmp3, tmp1, tmp2)); last = emit_move_insn (got, tmp3); - REG_NOTES (last) = gen_rtx_EXPR_LIST (REG_EQUAL, gsym, - REG_NOTES (last)); + set_unique_reg_note (last, REG_EQUAL, gsym); REG_NOTES (first) = gen_rtx_INSN_LIST (REG_LIBCALL, last, REG_NOTES (first)); REG_NOTES (last) = gen_rtx_INSN_LIST (REG_RETVAL, first, @@ -3388,7 +3600,7 @@ rs6000_legitimize_reload_address (rtx x, enum machine_mode mode, && REG_MODE_OK_FOR_BASE_P (XEXP (x, 0), mode) && GET_CODE (XEXP (x, 1)) == CONST_INT && !SPE_VECTOR_MODE (mode) - && !(TARGET_E500_DOUBLE && (mode == DFmode + && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode || mode == DImode)) && !ALTIVEC_VECTOR_MODE (mode)) { @@ -3525,7 +3737,8 @@ rs6000_legitimate_address (enum machine_mode mode, rtx x, int reg_ok_strict) && !SPE_VECTOR_MODE (mode) && mode != TFmode /* Restrict addressing for DI because of our SUBREG hackery. */ - && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == DImode)) + && !(TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode + || mode == DImode)) && TARGET_UPDATE && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)) return 1; @@ -3583,9 +3796,7 @@ rs6000_mode_dependent_address (rtx addr) case LO_SUM: return true; - case PRE_INC: - case PRE_DEC: - return TARGET_UPDATE; + /* Auto-increment cases are now treated generically in recog.c. */ default: break; @@ -3639,9 +3850,6 @@ rs6000_hard_regno_nregs (int regno, enum machine_mode mode) if (FP_REGNO_P (regno)) return (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD; - if (TARGET_E500_DOUBLE && mode == DFmode) - return 1; - if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode)) return (GET_MODE_SIZE (mode) + UNITS_PER_SPE_WORD - 1) / UNITS_PER_SPE_WORD; @@ -3649,6 +3857,14 @@ rs6000_hard_regno_nregs (int regno, enum machine_mode mode) return (GET_MODE_SIZE (mode) + UNITS_PER_ALTIVEC_WORD - 1) / UNITS_PER_ALTIVEC_WORD; + /* The value returned for SCmode in the E500 double case is 2 for + ABI compatibility; storing an SCmode value in a single register + would require function_arg and rs6000_spe_function_arg to handle + SCmode so as to pass the value correctly in a pair of + registers. */ + if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode) + return (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD; + return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; } @@ -3750,11 +3966,11 @@ rs6000_emit_set_const (rtx dest, enum machine_mode mode, case SImode: result = no_new_pseudos ? dest : gen_reg_rtx (SImode); - emit_insn (gen_rtx_SET (VOIDmode, result, + emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result), GEN_INT (INTVAL (source) & (~ (HOST_WIDE_INT) 0xffff)))); emit_insn (gen_rtx_SET (VOIDmode, dest, - gen_rtx_IOR (SImode, result, + gen_rtx_IOR (SImode, copy_rtx (result), GEN_INT (INTVAL (source) & 0xffff)))); result = dest; break; @@ -3809,7 +4025,7 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2) operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0, DImode); - operand2 = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0, + operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0, DImode); emit_move_insn (operand1, GEN_INT (c1)); emit_move_insn (operand2, GEN_INT (c2)); @@ -3844,7 +4060,9 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2) else emit_move_insn (dest, GEN_INT (ud2 << 16)); if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), + GEN_INT (ud1))); } else if ((ud4 == 0xffff && (ud3 & 0x8000)) || (ud4 == 0 && ! (ud3 & 0x8000))) @@ -3856,10 +4074,16 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2) emit_move_insn (dest, GEN_INT (ud3 << 16)); if (ud2 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud2))); - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (16))); + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), + GEN_INT (ud2))); + emit_move_insn (copy_rtx (dest), + gen_rtx_ASHIFT (DImode, copy_rtx (dest), + GEN_INT (16))); if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), + GEN_INT (ud1))); } else { @@ -3870,14 +4094,20 @@ rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2) emit_move_insn (dest, GEN_INT (ud4 << 16)); if (ud3 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud3))); + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), + GEN_INT (ud3))); - emit_move_insn (dest, gen_rtx_ASHIFT (DImode, dest, GEN_INT (32))); + emit_move_insn (copy_rtx (dest), + gen_rtx_ASHIFT (DImode, copy_rtx (dest), + GEN_INT (32))); if (ud2 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, - GEN_INT (ud2 << 16))); + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), + GEN_INT (ud2 << 16))); if (ud1 != 0) - emit_move_insn (dest, gen_rtx_IOR (DImode, dest, GEN_INT (ud1))); + emit_move_insn (copy_rtx (dest), + gen_rtx_IOR (DImode, copy_rtx (dest), GEN_INT (ud1))); } } return dest; @@ -3948,8 +4178,8 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) { emit_move_insn (adjust_address (operands[0], SImode, 0), adjust_address (operands[1], SImode, 0)); - emit_move_insn (adjust_address (operands[0], SImode, 4), - adjust_address (operands[1], SImode, 4)); + emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4), + adjust_address (copy_rtx (operands[1]), SImode, 4)); return; } @@ -3976,7 +4206,8 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) if (FP_REGNO_P (regnum) || regnum >= FIRST_PSEUDO_REGISTER) { rtx newreg; - newreg = (no_new_pseudos ? operands[1] : gen_reg_rtx (mode)); + newreg = (no_new_pseudos ? copy_rtx (operands[1]) + : gen_reg_rtx (mode)); emit_insn (gen_aux_truncdfsf2 (newreg, operands[1])); operands[1] = newreg; } @@ -4017,20 +4248,20 @@ rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode) /* 128-bit constant floating-point values on Darwin should really be loaded as two parts. */ - if (!TARGET_IEEEQUAD - && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128 + if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE) { /* DImode is used, not DFmode, because simplify_gen_subreg doesn't know how to get a DFmode SUBREG of a TFmode. */ - rs6000_emit_move (simplify_gen_subreg (DImode, operands[0], mode, 0), - simplify_gen_subreg (DImode, operands[1], mode, 0), - DImode); - rs6000_emit_move (simplify_gen_subreg (DImode, operands[0], mode, - GET_MODE_SIZE (DImode)), - simplify_gen_subreg (DImode, operands[1], mode, - GET_MODE_SIZE (DImode)), - DImode); + enum machine_mode imode = (TARGET_E500_DOUBLE ? DFmode : DImode); + rs6000_emit_move (simplify_gen_subreg (imode, operands[0], mode, 0), + simplify_gen_subreg (imode, operands[1], mode, 0), + imode); + rs6000_emit_move (simplify_gen_subreg (imode, operands[0], mode, + GET_MODE_SIZE (imode)), + simplify_gen_subreg (imode, operands[1], mode, + GET_MODE_SIZE (imode)), + imode); return; } @@ -4820,7 +5051,7 @@ function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, static rtx spe_build_register_parallel (enum machine_mode mode, int gregno) { - rtx r1, r3; + rtx r1, r3, r5, r7; switch (mode) { @@ -4830,12 +5061,24 @@ spe_build_register_parallel (enum machine_mode mode, int gregno) return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1)); case DCmode: + case TFmode: r1 = gen_rtx_REG (DImode, gregno); r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); r3 = gen_rtx_REG (DImode, gregno + 2); r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3)); + case TCmode: + r1 = gen_rtx_REG (DImode, gregno); + r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx); + r3 = gen_rtx_REG (DImode, gregno + 2); + r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8)); + r5 = gen_rtx_REG (DImode, gregno + 4); + r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16)); + r7 = gen_rtx_REG (DImode, gregno + 6); + r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24)); + return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7)); + default: gcc_unreachable (); } @@ -4850,7 +5093,8 @@ rs6000_spe_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but are passed and returned in a pair of GPRs for ABI compatibility. */ - if (TARGET_E500_DOUBLE && (mode == DFmode || mode == DCmode)) + if (TARGET_E500_DOUBLE && (mode == DFmode || mode == DCmode + || mode == TFmode || mode == TCmode)) { int n_words = rs6000_arg_size (mode, type); @@ -5268,7 +5512,9 @@ function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, else if (TARGET_SPE_ABI && TARGET_SPE && (SPE_VECTOR_MODE (mode) || (TARGET_E500_DOUBLE && (mode == DFmode - || mode == DCmode)))) + || mode == DCmode + || mode == TFmode + || mode == TCmode)))) return rs6000_spe_function_arg (cum, mode, type); else if (abi == ABI_V4) @@ -5826,7 +6072,7 @@ rs6000_va_start (tree valist, rtx nextarg) if (cfun->va_list_gpr_size) { - t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr, + t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, build_int_cst (NULL_TREE, n_gpr)); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -5834,7 +6080,7 @@ rs6000_va_start (tree valist, rtx nextarg) if (cfun->va_list_fpr_size) { - t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, + t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, build_int_cst (NULL_TREE, n_fpr)); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -5845,7 +6091,7 @@ rs6000_va_start (tree valist, rtx nextarg) if (words != 0) t = build2 (PLUS_EXPR, TREE_TYPE (ovf), t, build_int_cst (NULL_TREE, words * UNITS_PER_WORD)); - t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); + t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); @@ -5862,7 +6108,7 @@ rs6000_va_start (tree valist, rtx nextarg) if (cfun->machine->varargs_save_offset) t = build2 (PLUS_EXPR, TREE_TYPE (sav), t, build_int_cst (NULL_TREE, cfun->machine->varargs_save_offset)); - t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t); + t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (sav), sav, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } @@ -5995,7 +6241,7 @@ rs6000_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) u = build2 (MULT_EXPR, integer_type_node, u, size_int (sav_scale)); t = build2 (PLUS_EXPR, ptr_type_node, t, u); - t = build2 (MODIFY_EXPR, void_type_node, addr, t); + t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t); gimplify_and_add (t, pre_p); t = build1 (GOTO_EXPR, void_type_node, lab_over); @@ -6008,7 +6254,7 @@ rs6000_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) { /* Ensure that we don't find any more args in regs. Alignment has taken care of the n_reg == 2 gpr case. */ - t = build2 (MODIFY_EXPR, TREE_TYPE (reg), reg, size_int (8)); + t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (reg), reg, size_int (8)); gimplify_and_add (t, pre_p); } } @@ -6025,11 +6271,11 @@ rs6000_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) } gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); - u = build2 (MODIFY_EXPR, void_type_node, addr, t); + u = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t); gimplify_and_add (u, pre_p); t = build2 (PLUS_EXPR, TREE_TYPE (t), t, size_int (size)); - t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t); + t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t); gimplify_and_add (t, pre_p); if (lab_over) @@ -6038,6 +6284,23 @@ rs6000_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p) append_to_statement_list (t, pre_p); } + if (STRICT_ALIGNMENT + && (TYPE_ALIGN (type) + > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align))) + { + /* The value (of type complex double, for example) may not be + aligned in memory in the saved registers, so copy via a + temporary. (This is the same code as used for SPARC.) */ + tree tmp = create_tmp_var (type, "va_arg_tmp"); + tree dest_addr = build_fold_addr_expr (tmp); + + tree copy = build_call_expr (implicit_built_in_decls[BUILT_IN_MEMCPY], + 3, dest_addr, addr, size_int (rsize * 4)); + + gimplify_and_add (copy, pre_p); + addr = dest_addr; + } + addr = fold_convert (ptrtype, addr); return build_va_arg_indirect_ref (addr); } @@ -6053,8 +6316,8 @@ def_builtin (int mask, const char *name, tree type, int code) abort (); rs6000_builtin_decls[code] = - lang_hooks.builtin_function (name, type, code, BUILT_IN_MD, - NULL, NULL_TREE); + add_builtin_function (name, type, code, BUILT_IN_MD, + NULL, NULL_TREE); } } @@ -6667,10 +6930,10 @@ static struct builtin_description bdesc_1arg[] = }; static rtx -rs6000_expand_unop_builtin (enum insn_code icode, tree arglist, rtx target) +rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat; - tree arg0 = TREE_VALUE (arglist); + tree arg0 = CALL_EXPR_ARG (exp, 0); rtx op0 = expand_normal (arg0); enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode0 = insn_data[icode].operand[1].mode; @@ -6716,10 +6979,10 @@ rs6000_expand_unop_builtin (enum insn_code icode, tree arglist, rtx target) } static rtx -altivec_expand_abs_builtin (enum insn_code icode, tree arglist, rtx target) +altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat, scratch1, scratch2; - tree arg0 = TREE_VALUE (arglist); + tree arg0 = CALL_EXPR_ARG (exp, 0); rtx op0 = expand_normal (arg0); enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode0 = insn_data[icode].operand[1].mode; @@ -6748,11 +7011,11 @@ altivec_expand_abs_builtin (enum insn_code icode, tree arglist, rtx target) } static rtx -rs6000_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) +rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat; - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); enum machine_mode tmode = insn_data[icode].operand[0].mode; @@ -6822,12 +7085,12 @@ rs6000_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target) static rtx altivec_expand_predicate_builtin (enum insn_code icode, const char *opcode, - tree arglist, rtx target) + tree exp, rtx target) { rtx pat, scratch; - tree cr6_form = TREE_VALUE (arglist); - tree arg0 = TREE_VALUE (TREE_CHAIN (arglist)); - tree arg1 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + tree cr6_form = CALL_EXPR_ARG (exp, 0); + tree arg0 = CALL_EXPR_ARG (exp, 1); + tree arg1 = CALL_EXPR_ARG (exp, 2); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); enum machine_mode tmode = SImode; @@ -6898,11 +7161,11 @@ altivec_expand_predicate_builtin (enum insn_code icode, const char *opcode, } static rtx -altivec_expand_lv_builtin (enum insn_code icode, tree arglist, rtx target) +altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat, addr; - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode0 = Pmode; enum machine_mode mode1 = Pmode; @@ -6944,11 +7207,11 @@ altivec_expand_lv_builtin (enum insn_code icode, tree arglist, rtx target) } static rtx -spe_expand_stv_builtin (enum insn_code icode, tree arglist) +spe_expand_stv_builtin (enum insn_code icode, tree exp) { - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2 = expand_normal (arg2); @@ -6977,11 +7240,11 @@ spe_expand_stv_builtin (enum insn_code icode, tree arglist) } static rtx -altivec_expand_stv_builtin (enum insn_code icode, tree arglist) +altivec_expand_stv_builtin (enum insn_code icode, tree exp) { - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2 = expand_normal (arg2); @@ -7018,12 +7281,12 @@ altivec_expand_stv_builtin (enum insn_code icode, tree arglist) } static rtx -rs6000_expand_ternop_builtin (enum insn_code icode, tree arglist, rtx target) +rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat; - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2 = expand_normal (arg2); @@ -7081,8 +7344,7 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree arglist, rtx target) static rtx altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) { - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); - tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); tree arg0; enum machine_mode tmode, mode0; @@ -7110,7 +7372,7 @@ altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp) *expandedp = true; - arg0 = TREE_VALUE (arglist); + arg0 = CALL_EXPR_ARG (exp, 0); op0 = expand_normal (arg0); tmode = insn_data[icode].operand[0].mode; mode0 = insn_data[icode].operand[1].mode; @@ -7135,8 +7397,7 @@ static rtx altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, bool *expandedp) { - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); - tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); tree arg0, arg1; enum machine_mode mode0, mode1; @@ -7162,8 +7423,8 @@ altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, return NULL_RTX; } - arg0 = TREE_VALUE (arglist); - arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); op0 = expand_normal (arg0); op1 = expand_normal (arg1); mode0 = insn_data[icode].operand[0].mode; @@ -7187,8 +7448,7 @@ static rtx altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, bool *expandedp) { - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); - tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); tree arg0, arg1, arg2; enum machine_mode mode0, mode1, mode2; @@ -7203,9 +7463,9 @@ altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++) if (d->code == fcode) { - arg0 = TREE_VALUE (arglist); - arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); op0 = expand_normal (arg0); op1 = expand_normal (arg1); op2 = expand_normal (arg2); @@ -7245,7 +7505,7 @@ altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED, /* Expand vec_init builtin. */ static rtx -altivec_expand_vec_init_builtin (tree type, tree arglist, rtx target) +altivec_expand_vec_init_builtin (tree type, tree exp, rtx target) { enum machine_mode tmode = TYPE_MODE (type); enum machine_mode inner_mode = GET_MODE_INNER (tmode); @@ -7253,15 +7513,14 @@ altivec_expand_vec_init_builtin (tree type, tree arglist, rtx target) rtvec v = rtvec_alloc (n_elt); gcc_assert (VECTOR_MODE_P (tmode)); - - for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist)) + gcc_assert (n_elt == call_expr_nargs (exp)); + + for (i = 0; i < n_elt; ++i) { - rtx x = expand_normal (TREE_VALUE (arglist)); + rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); } - gcc_assert (arglist == NULL); - if (!target || !register_operand (target, tmode)) target = gen_reg_rtx (tmode); @@ -7289,16 +7548,16 @@ get_element_number (tree vec_type, tree arg) /* Expand vec_set builtin. */ static rtx -altivec_expand_vec_set_builtin (tree arglist) +altivec_expand_vec_set_builtin (tree exp) { enum machine_mode tmode, mode1; tree arg0, arg1, arg2; int elt; rtx op0, op1; - arg0 = TREE_VALUE (arglist); - arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); + arg2 = CALL_EXPR_ARG (exp, 2); tmode = TYPE_MODE (TREE_TYPE (arg0)); mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); @@ -7321,15 +7580,15 @@ altivec_expand_vec_set_builtin (tree arglist) /* Expand vec_ext builtin. */ static rtx -altivec_expand_vec_ext_builtin (tree arglist, rtx target) +altivec_expand_vec_ext_builtin (tree exp, rtx target) { enum machine_mode tmode, mode0; tree arg0, arg1; int elt; rtx op0; - arg0 = TREE_VALUE (arglist); - arg1 = TREE_VALUE (TREE_CHAIN (arglist)); + arg0 = CALL_EXPR_ARG (exp, 0); + arg1 = CALL_EXPR_ARG (exp, 1); op0 = expand_normal (arg0); elt = get_element_number (TREE_TYPE (arg0), arg1); @@ -7357,8 +7616,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) struct builtin_description_predicates *dp; size_t i; enum insn_code icode; - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); - tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); tree arg0; rtx op0, pat; enum machine_mode tmode, mode0; @@ -7389,15 +7647,15 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) switch (fcode) { case ALTIVEC_BUILTIN_STVX: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx, arglist); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx, exp); case ALTIVEC_BUILTIN_STVEBX: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, arglist); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp); case ALTIVEC_BUILTIN_STVEHX: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, arglist); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp); case ALTIVEC_BUILTIN_STVEWX: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, arglist); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp); case ALTIVEC_BUILTIN_STVXL: - return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, arglist); + return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl, exp); case ALTIVEC_BUILTIN_MFVSCR: icode = CODE_FOR_altivec_mfvscr; @@ -7416,7 +7674,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_MTVSCR: icode = CODE_FOR_altivec_mtvscr; - arg0 = TREE_VALUE (arglist); + arg0 = CALL_EXPR_ARG (exp, 0); op0 = expand_normal (arg0); mode0 = insn_data[icode].operand[0].mode; @@ -7438,7 +7696,7 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_DSS: icode = CODE_FOR_altivec_dss; - arg0 = TREE_VALUE (arglist); + arg0 = CALL_EXPR_ARG (exp, 0); STRIP_NOPS (arg0); op0 = expand_normal (arg0); mode0 = insn_data[icode].operand[0].mode; @@ -7464,19 +7722,19 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) case ALTIVEC_BUILTIN_VEC_INIT_V8HI: case ALTIVEC_BUILTIN_VEC_INIT_V16QI: case ALTIVEC_BUILTIN_VEC_INIT_V4SF: - return altivec_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target); + return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); case ALTIVEC_BUILTIN_VEC_SET_V4SI: case ALTIVEC_BUILTIN_VEC_SET_V8HI: case ALTIVEC_BUILTIN_VEC_SET_V16QI: case ALTIVEC_BUILTIN_VEC_SET_V4SF: - return altivec_expand_vec_set_builtin (arglist); + return altivec_expand_vec_set_builtin (exp); case ALTIVEC_BUILTIN_VEC_EXT_V4SI: case ALTIVEC_BUILTIN_VEC_EXT_V8HI: case ALTIVEC_BUILTIN_VEC_EXT_V16QI: case ALTIVEC_BUILTIN_VEC_EXT_V4SF: - return altivec_expand_vec_ext_builtin (arglist, target); + return altivec_expand_vec_ext_builtin (exp, target); default: break; @@ -7487,39 +7745,39 @@ altivec_expand_builtin (tree exp, rtx target, bool *expandedp) d = (struct builtin_description *) bdesc_abs; for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++) if (d->code == fcode) - return altivec_expand_abs_builtin (d->icode, arglist, target); + return altivec_expand_abs_builtin (d->icode, exp, target); /* Expand the AltiVec predicates. */ dp = (struct builtin_description_predicates *) bdesc_altivec_preds; for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, dp++) if (dp->code == fcode) return altivec_expand_predicate_builtin (dp->icode, dp->opcode, - arglist, target); + exp, target); /* LV* are funky. We initialized them differently. */ switch (fcode) { case ALTIVEC_BUILTIN_LVSL: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl, - arglist, target); + exp, target); case ALTIVEC_BUILTIN_LVSR: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr, - arglist, target); + exp, target); case ALTIVEC_BUILTIN_LVEBX: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx, - arglist, target); + exp, target); case ALTIVEC_BUILTIN_LVEHX: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx, - arglist, target); + exp, target); case ALTIVEC_BUILTIN_LVEWX: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx, - arglist, target); + exp, target); case ALTIVEC_BUILTIN_LVXL: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl, - arglist, target); + exp, target); case ALTIVEC_BUILTIN_LVX: return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx, - arglist, target); + exp, target); default: break; /* Fall through. */ @@ -7565,8 +7823,7 @@ static struct builtin_description bdesc_2arg_spe[] = static rtx spe_expand_builtin (tree exp, rtx target, bool *expandedp) { - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); - tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); tree arg1, arg0; unsigned int fcode = DECL_FUNCTION_CODE (fndecl); enum insn_code icode; @@ -7587,7 +7844,7 @@ spe_expand_builtin (tree exp, rtx target, bool *expandedp) case SPE_BUILTIN_EVSTWHO: case SPE_BUILTIN_EVSTWWE: case SPE_BUILTIN_EVSTWWO: - arg1 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + arg1 = CALL_EXPR_ARG (exp, 2); if (TREE_CODE (arg1) != INTEGER_CST || TREE_INT_CST_LOW (arg1) & ~0x1f) { @@ -7604,10 +7861,10 @@ spe_expand_builtin (tree exp, rtx target, bool *expandedp) { case SPE_BUILTIN_EVSPLATFI: return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi, - arglist, target); + exp, target); case SPE_BUILTIN_EVSPLATI: return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati, - arglist, target); + exp, target); default: break; } @@ -7615,48 +7872,48 @@ spe_expand_builtin (tree exp, rtx target, bool *expandedp) d = (struct builtin_description *) bdesc_2arg_spe; for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d) if (d->code == fcode) - return rs6000_expand_binop_builtin (d->icode, arglist, target); + return rs6000_expand_binop_builtin (d->icode, exp, target); d = (struct builtin_description *) bdesc_spe_predicates; for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d) if (d->code == fcode) - return spe_expand_predicate_builtin (d->icode, arglist, target); + return spe_expand_predicate_builtin (d->icode, exp, target); d = (struct builtin_description *) bdesc_spe_evsel; for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d) if (d->code == fcode) - return spe_expand_evsel_builtin (d->icode, arglist, target); + return spe_expand_evsel_builtin (d->icode, exp, target); switch (fcode) { case SPE_BUILTIN_EVSTDDX: - return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp); case SPE_BUILTIN_EVSTDHX: - return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp); case SPE_BUILTIN_EVSTDWX: - return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp); case SPE_BUILTIN_EVSTWHEX: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp); case SPE_BUILTIN_EVSTWHOX: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp); case SPE_BUILTIN_EVSTWWEX: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp); case SPE_BUILTIN_EVSTWWOX: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp); case SPE_BUILTIN_EVSTDD: - return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp); case SPE_BUILTIN_EVSTDH: - return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp); case SPE_BUILTIN_EVSTDW: - return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp); case SPE_BUILTIN_EVSTWHE: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp); case SPE_BUILTIN_EVSTWHO: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp); case SPE_BUILTIN_EVSTWWE: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp); case SPE_BUILTIN_EVSTWWO: - return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, arglist); + return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp); case SPE_BUILTIN_MFSPEFSCR: icode = CODE_FOR_spe_mfspefscr; tmode = insn_data[icode].operand[0].mode; @@ -7673,7 +7930,7 @@ spe_expand_builtin (tree exp, rtx target, bool *expandedp) return target; case SPE_BUILTIN_MTSPEFSCR: icode = CODE_FOR_spe_mtspefscr; - arg0 = TREE_VALUE (arglist); + arg0 = CALL_EXPR_ARG (exp, 0); op0 = expand_normal (arg0); mode0 = insn_data[icode].operand[0].mode; @@ -7696,12 +7953,12 @@ spe_expand_builtin (tree exp, rtx target, bool *expandedp) } static rtx -spe_expand_predicate_builtin (enum insn_code icode, tree arglist, rtx target) +spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat, scratch, tmp; - tree form = TREE_VALUE (arglist); - tree arg0 = TREE_VALUE (TREE_CHAIN (arglist)); - tree arg1 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); + tree form = CALL_EXPR_ARG (exp, 0); + tree arg0 = CALL_EXPR_ARG (exp, 1); + tree arg1 = CALL_EXPR_ARG (exp, 2); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); enum machine_mode mode0 = insn_data[icode].operand[1].mode; @@ -7804,13 +8061,13 @@ spe_expand_predicate_builtin (enum insn_code icode, tree arglist, rtx target) */ static rtx -spe_expand_evsel_builtin (enum insn_code icode, tree arglist, rtx target) +spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target) { rtx pat, scratch; - tree arg0 = TREE_VALUE (arglist); - tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); - tree arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); - tree arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist)))); + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree arg1 = CALL_EXPR_ARG (exp, 1); + tree arg2 = CALL_EXPR_ARG (exp, 2); + tree arg3 = CALL_EXPR_ARG (exp, 3); rtx op0 = expand_normal (arg0); rtx op1 = expand_normal (arg1); rtx op2 = expand_normal (arg2); @@ -7864,8 +8121,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, enum machine_mode mode ATTRIBUTE_UNUSED, int ignore ATTRIBUTE_UNUSED) { - tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); - tree arglist = TREE_OPERAND (exp, 1); + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); unsigned int fcode = DECL_FUNCTION_CODE (fndecl); struct builtin_description *d; size_t i; @@ -7883,7 +8139,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, gcc_assert (TARGET_ALTIVEC); - arg = TREE_VALUE (arglist); + arg = CALL_EXPR_ARG (exp, 0); gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE); op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL); addr = memory_address (mode, op); @@ -7912,6 +8168,16 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, return target; } + /* FIXME: There's got to be a nicer way to handle this case than + constructing a new CALL_EXPR. */ + if (fcode == ALTIVEC_BUILTIN_VCFUX + || fcode == ALTIVEC_BUILTIN_VCFSX) + { + if (call_expr_nargs (exp) == 1) + exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp), + 2, CALL_EXPR_ARG (exp, 0), integer_zero_node); + } + if (TARGET_ALTIVEC) { ret = altivec_expand_builtin (exp, target, &success); @@ -7933,19 +8199,19 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, d = (struct builtin_description *) bdesc_1arg; for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++) if (d->code == fcode) - return rs6000_expand_unop_builtin (d->icode, arglist, target); + return rs6000_expand_unop_builtin (d->icode, exp, target); /* Handle simple binary operations. */ d = (struct builtin_description *) bdesc_2arg; for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++) if (d->code == fcode) - return rs6000_expand_binop_builtin (d->icode, arglist, target); + return rs6000_expand_binop_builtin (d->icode, exp, target); /* Handle simple ternary operations. */ d = (struct builtin_description *) bdesc_3arg; for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++) if (d->code == fcode) - return rs6000_expand_ternop_builtin (d->icode, arglist, target); + return rs6000_expand_ternop_builtin (d->icode, exp, target); gcc_unreachable (); } @@ -8578,12 +8844,11 @@ altivec_init_builtins (void) /* Initialize target builtin that implements targetm.vectorize.builtin_mask_for_load. */ - decl = lang_hooks.builtin_function ("__builtin_altivec_mask_for_load", - v16qi_ftype_long_pcvoid, - ALTIVEC_BUILTIN_MASK_FOR_LOAD, - BUILT_IN_MD, NULL, - tree_cons (get_identifier ("const"), - NULL_TREE, NULL_TREE)); + decl = add_builtin_function ("__builtin_altivec_mask_for_load", + v16qi_ftype_long_pcvoid, + ALTIVEC_BUILTIN_MASK_FOR_LOAD, + BUILT_IN_MD, NULL, NULL_TREE); + TREE_READONLY (decl) = 1; /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */ altivec_builtin_mask_for_load = decl; } @@ -9182,9 +9447,6 @@ rs6000_common_init_builtins (void) static void rs6000_init_libfuncs (void) { - if (!TARGET_HARD_FLOAT) - return; - if (DEFAULT_ABI != ABI_V4 && TARGET_XCOFF && !TARGET_POWER2 && !TARGET_POWERPC) { @@ -9203,6 +9465,27 @@ rs6000_init_libfuncs (void) set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub"); set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul"); set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv"); + + if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE))) + { + set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg"); + set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq"); + set_optab_libfunc (ne_optab, TFmode, "__gcc_qne"); + set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt"); + set_optab_libfunc (ge_optab, TFmode, "__gcc_qge"); + set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt"); + set_optab_libfunc (le_optab, TFmode, "__gcc_qle"); + set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord"); + + set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq"); + set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq"); + set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos"); + set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod"); + set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi"); + set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou"); + set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq"); + set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq"); + } } else { @@ -10796,7 +11079,10 @@ print_operand (FILE *file, rtx x, int code) tmp = XEXP (x, 0); /* Ugly hack because %y is overloaded. */ - if (TARGET_E500 && GET_MODE_SIZE (GET_MODE (x)) == 8) + if ((TARGET_SPE || TARGET_E500_DOUBLE) + && (GET_MODE_SIZE (GET_MODE (x)) == 8 + || GET_MODE (x) == TFmode + || GET_MODE (x) == TImode)) { /* Handle [reg]. */ if (GET_CODE (tmp) == REG) @@ -11095,7 +11381,7 @@ rs6000_generate_compare (enum rtx_code code) compare_result = gen_reg_rtx (comp_mode); /* E500 FP compare instructions on the GPRs. Yuck! */ - if ((TARGET_E500 && !TARGET_FPRS && TARGET_HARD_FLOAT) + if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && rs6000_compare_fp_p) { rtx cmp, or_result, compare_result2; @@ -11128,6 +11414,14 @@ rs6000_generate_compare (enum rtx_code code) rs6000_compare_op1); break; + case TFmode: + cmp = flag_unsafe_math_optimizations + ? gen_tsttfeq_gpr (compare_result, rs6000_compare_op0, + rs6000_compare_op1) + : gen_cmptfeq_gpr (compare_result, rs6000_compare_op0, + rs6000_compare_op1); + break; + default: gcc_unreachable (); } @@ -11152,6 +11446,14 @@ rs6000_generate_compare (enum rtx_code code) rs6000_compare_op1); break; + case TFmode: + cmp = flag_unsafe_math_optimizations + ? gen_tsttfgt_gpr (compare_result, rs6000_compare_op0, + rs6000_compare_op1) + : gen_cmptfgt_gpr (compare_result, rs6000_compare_op0, + rs6000_compare_op1); + break; + default: gcc_unreachable (); } @@ -11176,6 +11478,14 @@ rs6000_generate_compare (enum rtx_code code) rs6000_compare_op1); break; + case TFmode: + cmp = flag_unsafe_math_optimizations + ? gen_tsttflt_gpr (compare_result, rs6000_compare_op0, + rs6000_compare_op1) + : gen_cmptflt_gpr (compare_result, rs6000_compare_op0, + rs6000_compare_op1); + break; + default: gcc_unreachable (); } @@ -11219,6 +11529,14 @@ rs6000_generate_compare (enum rtx_code code) rs6000_compare_op1); break; + case TFmode: + cmp = flag_unsafe_math_optimizations + ? gen_tsttfeq_gpr (compare_result2, rs6000_compare_op0, + rs6000_compare_op1) + : gen_cmptfeq_gpr (compare_result2, rs6000_compare_op0, + rs6000_compare_op1); + break; + default: gcc_unreachable (); } @@ -11288,7 +11606,7 @@ rs6000_generate_compare (enum rtx_code code) under flag_finite_math_only we don't bother. */ if (rs6000_compare_fp_p && !flag_finite_math_only - && !(TARGET_HARD_FLOAT && TARGET_E500 && !TARGET_FPRS) + && !(TARGET_HARD_FLOAT && !TARGET_FPRS) && (code == LE || code == GE || code == UNEQ || code == LTGT || code == UNGT || code == UNLT)) @@ -11338,7 +11656,7 @@ rs6000_emit_sCOND (enum rtx_code code, rtx result) condition_rtx = rs6000_generate_compare (code); cond_code = GET_CODE (condition_rtx); - if (TARGET_E500 && rs6000_compare_fp_p + if (rs6000_compare_fp_p && !TARGET_FPRS && TARGET_HARD_FLOAT) { rtx t; @@ -11445,7 +11763,7 @@ output_cbranch (rtx op, const char *label, int reversed, rtx insn) code = reverse_condition (code); } - if ((TARGET_E500 && !TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode) + if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode) { /* The efscmp/tst* instructions twiddle bit 2, which maps nicely to the GT bit. */ @@ -11652,11 +11970,20 @@ rs6000_emit_vector_compare (enum rtx_code rcode, try_again = true; break; case NE: - /* Treat A != B as ~(A==B). */ + case UNLE: + case UNLT: + case UNGE: + case UNGT: + /* Invert condition and try again. + e.g., A != B becomes ~(A==B). */ { + enum rtx_code rev_code; enum insn_code nor_code; - rtx eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, - dest_mode); + rtx eq_rtx; + + rev_code = reverse_condition_maybe_unordered (rcode); + eq_rtx = rs6000_emit_vector_compare (rev_code, op0, op1, + dest_mode); nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code; gcc_assert (nor_code != CODE_FOR_nothing); @@ -11861,7 +12188,7 @@ rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond) return rs6000_emit_int_cmove (dest, op, true_cond, false_cond); return 0; } - else if (TARGET_E500 && TARGET_HARD_FLOAT && !TARGET_FPRS + else if (TARGET_HARD_FLOAT && !TARGET_FPRS && SCALAR_FLOAT_MODE_P (compare_mode)) return 0; @@ -12540,6 +12867,8 @@ rs6000_split_multireg_move (rtx dst, rtx src) reg_mode = DFmode; else if (ALTIVEC_REGNO_P (reg)) reg_mode = V16QImode; + else if (TARGET_E500_DOUBLE && mode == TFmode) + reg_mode = DFmode; else reg_mode = word_mode; reg_mode_size = GET_MODE_SIZE (reg_mode); @@ -12716,6 +13045,13 @@ first_altivec_reg_to_save (void) if (! TARGET_ALTIVEC_ABI) return LAST_ALTIVEC_REGNO + 1; + /* On Darwin, the unwind routines are compiled without + TARGET_ALTIVEC, and use save_world to save/restore the + altivec registers when necessary. */ + if (DEFAULT_ABI == ABI_DARWIN && current_function_calls_eh_return + && ! TARGET_ALTIVEC) + return FIRST_ALTIVEC_REGNO + 20; + /* Find lowest numbered live register. */ for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i) if (regs_ever_live[i]) @@ -12733,6 +13069,13 @@ compute_vrsave_mask (void) { unsigned int i, mask = 0; + /* On Darwin, the unwind routines are compiled without + TARGET_ALTIVEC, and use save_world to save/restore the + call-saved altivec registers when necessary. */ + if (DEFAULT_ABI == ABI_DARWIN && current_function_calls_eh_return + && ! TARGET_ALTIVEC) + mask |= 0xFFF; + /* First, find out if we use _any_ altivec registers. */ for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i) if (regs_ever_live[i]) @@ -13074,10 +13417,11 @@ rs6000_stack_info (void) info_ptr->vrsave_save_offset = info_ptr->gp_save_offset - info_ptr->vrsave_size; - /* Align stack so vector save area is on a quadword boundary. */ + /* Align stack so vector save area is on a quadword boundary. + The padding goes above the vectors. */ if (info_ptr->altivec_size != 0) info_ptr->altivec_padding_size - = 16 - (-info_ptr->vrsave_save_offset % 16); + = info_ptr->vrsave_save_offset & 0xF; else info_ptr->altivec_padding_size = 0; @@ -13085,6 +13429,8 @@ rs6000_stack_info (void) = info_ptr->vrsave_save_offset - info_ptr->altivec_padding_size - info_ptr->altivec_size; + gcc_assert (info_ptr->altivec_size == 0 + || info_ptr->altivec_save_offset % 16 == 0); /* Adjust for AltiVec case. */ info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size; @@ -13255,7 +13601,7 @@ spe_func_has_64bit_regs_p (void) if (SPE_VECTOR_MODE (mode)) return true; - if (TARGET_E500_DOUBLE && mode == DFmode) + if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode)) return true; } } @@ -14225,7 +14571,8 @@ rs6000_emit_prologue (void) || cfun->machine->ra_need_lr); /* For V.4, update stack before we do any saving and set back pointer. */ - if (info->push_p + if (! WORLD_SAVE_P (info) + && info->push_p && (DEFAULT_ABI == ABI_V4 || current_function_calls_eh_return)) { @@ -14250,11 +14597,13 @@ rs6000_emit_prologue (void) int i, j, sz; rtx treg; rtvec p; + rtx reg0; /* save_world expects lr in r0. */ + reg0 = gen_rtx_REG (Pmode, 0); if (info->lr_save_p) { - insn = emit_move_insn (gen_rtx_REG (Pmode, 0), + insn = emit_move_insn (reg0, gen_rtx_REG (Pmode, LINK_REGISTER_REGNUM)); RTX_FRAME_RELATED_P (insn) = 1; } @@ -14271,7 +14620,7 @@ rs6000_emit_prologue (void) && (!current_function_calls_eh_return || info->ehrd_offset == -432) && info->vrsave_save_offset == -224 - && info->altivec_save_offset == (-224 -16 -192)); + && info->altivec_save_offset == -416); treg = gen_rtx_REG (SImode, 11); emit_move_insn (treg, GEN_INT (-info->total_size)); @@ -14280,7 +14629,7 @@ rs6000_emit_prologue (void) in R11. It also clobbers R12, so beware! */ /* Preserve CR2 for save_world prologues */ - sz = 6; + sz = 5; sz += 32 - info->first_gp_reg_save; sz += 64 - info->first_fp_reg_save; sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1; @@ -14335,29 +14684,26 @@ rs6000_emit_prologue (void) RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg); } - /* Prevent any attempt to delete the setting of r0 and treg! */ - RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0)); - RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode, treg); - RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode, sp_reg_rtx); + /* Explain about use of R0. */ + if (info->lr_save_p) + { + rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, + GEN_INT (info->lr_save_offset + + sp_offset)); + rtx mem = gen_frame_mem (reg_mode, addr); + + RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, mem, reg0); + } + /* Explain what happens to the stack pointer. */ + { + rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg); + RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval); + } insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); rs6000_frame_related (insn, frame_ptr_rtx, info->total_size, - NULL_RTX, NULL_RTX); - - if (current_function_calls_eh_return) - { - unsigned int i; - for (i = 0; ; ++i) - { - unsigned int regno = EH_RETURN_DATA_REGNO (i); - if (regno == INVALID_REGNUM) - break; - emit_frame_save (frame_reg_rtx, frame_ptr_rtx, reg_mode, regno, - info->ehrd_offset + sp_offset - + reg_size * (int) i, - info->total_size); - } - } + treg, GEN_INT (-info->total_size)); + sp_offset = info->total_size; } /* Save AltiVec registers if needed. */ @@ -14586,7 +14932,7 @@ rs6000_emit_prologue (void) /* ??? There's no need to emit actual instructions here, but it's the easiest way to get the frame unwind information emitted. */ - if (!WORLD_SAVE_P (info) && current_function_calls_eh_return) + if (current_function_calls_eh_return) { unsigned int i, regno; @@ -14810,7 +15156,7 @@ rs6000_emit_epilogue (int sibcall) rs6000_stack_t *info; int restoring_FPRs_inline; int using_load_multiple; - int using_mfcr_multiple; + int using_mtcr_multiple; int use_backchain_to_restore_sp; int sp_offset = 0; rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1); @@ -14839,7 +15185,7 @@ rs6000_emit_epilogue (int sibcall) use_backchain_to_restore_sp = (frame_pointer_needed || current_function_calls_alloca || info->total_size > 32767); - using_mfcr_multiple = (rs6000_cpu == PROCESSOR_PPC601 + using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603 || rs6000_cpu == PROCESSOR_PPC750 || optimize_size); @@ -15139,7 +15485,7 @@ rs6000_emit_epilogue (int sibcall) rtx r12_rtx = gen_rtx_REG (SImode, 12); int count = 0; - if (using_mfcr_multiple) + if (using_mtcr_multiple) { for (i = 0; i < 8; i++) if (regs_ever_live[CR0_REGNO+i] && ! call_used_regs[CR0_REGNO+i]) @@ -15147,7 +15493,7 @@ rs6000_emit_epilogue (int sibcall) gcc_assert (count); } - if (using_mfcr_multiple && count > 1) + if (using_mtcr_multiple && count > 1) { rtvec p; int ndx; @@ -16430,6 +16776,16 @@ output_function_profiler (FILE *file, int labelno) } + +/* The following variable value is the last issued insn. */ + +static rtx last_scheduled_insn; + +/* The following variable helps to balance issuing of load and + store instructions */ + +static int load_store_pendulum; + /* Power4 load update and store update instructions are cracked into a load or store and an integer insn which are executed in the same cycle. Branches have their own dispatch slot which does not count against the @@ -16441,19 +16797,41 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED, rtx insn, int more) { + last_scheduled_insn = insn; if (GET_CODE (PATTERN (insn)) == USE || GET_CODE (PATTERN (insn)) == CLOBBER) + { + cached_can_issue_more = more; + return cached_can_issue_more; + } + + if (insn_terminates_group_p (insn, current_group)) + { + cached_can_issue_more = 0; + return cached_can_issue_more; + } + + /* If no reservation, but reach here */ + if (recog_memoized (insn) < 0) return more; if (rs6000_sched_groups) { if (is_microcoded_insn (insn)) - return 0; + cached_can_issue_more = 0; else if (is_cracked_insn (insn)) - return more > 2 ? more - 2 : 0; + cached_can_issue_more = more > 2 ? more - 2 : 0; + else + cached_can_issue_more = more - 1; + + return cached_can_issue_more; } - return more - 1; + if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn)) + return 0; + + cached_can_issue_more = more - 1; + return cached_can_issue_more; } /* Adjust the cost of a scheduling dependency. Return the new cost of @@ -16462,64 +16840,286 @@ rs6000_variable_issue (FILE *stream ATTRIBUTE_UNUSED, static int rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) { + enum attr_type attr_type; + if (! recog_memoized (insn)) return 0; - if (REG_NOTE_KIND (link) != 0) - return 0; + switch (REG_NOTE_KIND (link)) + { + case REG_DEP_TRUE: + { + /* Data dependency; DEP_INSN writes a register that INSN reads + some cycles later. */ + + /* Separate a load from a narrower, dependent store. */ + if (rs6000_sched_groups + && GET_CODE (PATTERN (insn)) == SET + && GET_CODE (PATTERN (dep_insn)) == SET + && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM + && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM + && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) + > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) + return cost + 14; + + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_JMPREG: + /* Tell the first scheduling pass about the latency between + a mtctr and bctr (and mtlr and br/blr). The first + scheduling pass will not know about this latency since + the mtctr instruction, which has the latency associated + to it, will be generated by reload. */ + return TARGET_POWER ? 5 : 4; + case TYPE_BRANCH: + /* Leave some extra cycles between a compare and its + dependent branch, to inhibit expensive mispredicts. */ + if ((rs6000_cpu_attr == CPU_PPC603 + || rs6000_cpu_attr == CPU_PPC604 + || rs6000_cpu_attr == CPU_PPC604E + || rs6000_cpu_attr == CPU_PPC620 + || rs6000_cpu_attr == CPU_PPC630 + || rs6000_cpu_attr == CPU_PPC750 + || rs6000_cpu_attr == CPU_PPC7400 + || rs6000_cpu_attr == CPU_PPC7450 + || rs6000_cpu_attr == CPU_POWER4 + || rs6000_cpu_attr == CPU_POWER5 + || rs6000_cpu_attr == CPU_CELL) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + + switch (get_attr_type (dep_insn)) + { + case TYPE_CMP: + case TYPE_COMPARE: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_CR_LOGICAL: + case TYPE_DELAYED_CR: + return cost + 2; + default: + break; + } + break; + + case TYPE_STORE: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + if (GET_CODE (PATTERN (insn)) != SET) + /* If this happens, we have to extend this to schedule + optimally. Return default for now. */ + return cost; + + /* Adjust the cost for the case where the value written + by a fixed point operation is used as the address + gen value on a store. */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_CNTLZ: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 4; + break; + } + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_VAR_DELAYED_COMPARE: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 6; + break; + } + case TYPE_INTEGER: + case TYPE_COMPARE: + case TYPE_FAST_COMPARE: + case TYPE_EXTS: + case TYPE_SHIFT: + case TYPE_INSERT_WORD: + case TYPE_INSERT_DWORD: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 3; + break; + } + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 17; + break; + } + case TYPE_IDIV: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 45; + break; + } + case TYPE_LDIV: + { + if (! store_data_bypass_p (dep_insn, insn)) + return 57; + break; + } + default: + break; + } + } + break; + + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + + /* Adjust the cost for the case where the value written + by a fixed point instruction is used within the address + gen portion of a subsequent load(u)(x) */ + switch (get_attr_type (dep_insn)) + { + case TYPE_LOAD: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_CNTLZ: + { + if (set_to_load_agen (dep_insn, insn)) + return 4; + break; + } + case TYPE_LOAD_EXT: + case TYPE_LOAD_EXT_U: + case TYPE_LOAD_EXT_UX: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_VAR_DELAYED_COMPARE: + { + if (set_to_load_agen (dep_insn, insn)) + return 6; + break; + } + case TYPE_INTEGER: + case TYPE_COMPARE: + case TYPE_FAST_COMPARE: + case TYPE_EXTS: + case TYPE_SHIFT: + case TYPE_INSERT_WORD: + case TYPE_INSERT_DWORD: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + { + if (set_to_load_agen (dep_insn, insn)) + return 3; + break; + } + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + { + if (set_to_load_agen (dep_insn, insn)) + return 17; + break; + } + case TYPE_IDIV: + { + if (set_to_load_agen (dep_insn, insn)) + return 45; + break; + } + case TYPE_LDIV: + { + if (set_to_load_agen (dep_insn, insn)) + return 57; + break; + } + default: + break; + } + } + break; + + case TYPE_FPLOAD: + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0) + && (get_attr_type (dep_insn) == TYPE_MFFGPR)) + return 2; + + default: + break; + } - if (REG_NOTE_KIND (link) == 0) - { - /* Data dependency; DEP_INSN writes a register that INSN reads - some cycles later. */ - - /* Separate a load from a narrower, dependent store. */ - if (rs6000_sched_groups - && GET_CODE (PATTERN (insn)) == SET - && GET_CODE (PATTERN (dep_insn)) == SET - && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM - && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM - && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1))) - > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0))))) - return cost + 14; - - switch (get_attr_type (insn)) - { - case TYPE_JMPREG: - /* Tell the first scheduling pass about the latency between - a mtctr and bctr (and mtlr and br/blr). The first - scheduling pass will not know about this latency since - the mtctr instruction, which has the latency associated - to it, will be generated by reload. */ - return TARGET_POWER ? 5 : 4; - case TYPE_BRANCH: - /* Leave some extra cycles between a compare and its - dependent branch, to inhibit expensive mispredicts. */ - if ((rs6000_cpu_attr == CPU_PPC603 - || rs6000_cpu_attr == CPU_PPC604 - || rs6000_cpu_attr == CPU_PPC604E - || rs6000_cpu_attr == CPU_PPC620 - || rs6000_cpu_attr == CPU_PPC630 - || rs6000_cpu_attr == CPU_PPC750 - || rs6000_cpu_attr == CPU_PPC7400 - || rs6000_cpu_attr == CPU_PPC7450 - || rs6000_cpu_attr == CPU_POWER4 - || rs6000_cpu_attr == CPU_POWER5) - && recog_memoized (dep_insn) - && (INSN_CODE (dep_insn) >= 0) - && (get_attr_type (dep_insn) == TYPE_CMP - || get_attr_type (dep_insn) == TYPE_COMPARE - || get_attr_type (dep_insn) == TYPE_DELAYED_COMPARE - || get_attr_type (dep_insn) == TYPE_IMUL_COMPARE - || get_attr_type (dep_insn) == TYPE_LMUL_COMPARE - || get_attr_type (dep_insn) == TYPE_FPCOMPARE - || get_attr_type (dep_insn) == TYPE_CR_LOGICAL - || get_attr_type (dep_insn) == TYPE_DELAYED_CR)) - return cost + 2; - default: - break; - } /* Fall out to return default cost. */ + } + break; + + case REG_DEP_OUTPUT: + /* Output dependency; DEP_INSN writes a register that INSN writes some + cycles later. */ + if ((rs6000_cpu == PROCESSOR_POWER6) + && recog_memoized (dep_insn) + && (INSN_CODE (dep_insn) >= 0)) + { + attr_type = get_attr_type (insn); + + switch (attr_type) + { + case TYPE_FP: + if (get_attr_type (dep_insn) == TYPE_FP) + return 1; + break; + case TYPE_FPLOAD: + if (get_attr_type (dep_insn) == TYPE_MFFGPR) + return 2; + break; + default: + break; + } + } + case REG_DEP_ANTI: + /* Anti dependency; DEP_INSN reads a register that INSN writes some + cycles later. */ + return 0; + + default: + gcc_unreachable (); } return cost; @@ -16536,6 +17136,9 @@ is_microcoded_insn (rtx insn) || GET_CODE (PATTERN (insn)) == CLOBBER) return false; + if (rs6000_cpu_attr == CPU_CELL) + return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS; + if (rs6000_sched_groups) { enum attr_type type = get_attr_type (insn); @@ -16550,55 +17153,6 @@ is_microcoded_insn (rtx insn) return false; } -/* The function returns a nonzero value if INSN can be scheduled only - as the first insn in a dispatch group ("dispatch-slot restricted"). - In this case, the returned value indicates how many dispatch slots - the insn occupies (at the beginning of the group). - Return 0 otherwise. */ - -static int -is_dispatch_slot_restricted (rtx insn) -{ - enum attr_type type; - - if (!rs6000_sched_groups) - return 0; - - if (!insn - || insn == NULL_RTX - || GET_CODE (insn) == NOTE - || GET_CODE (PATTERN (insn)) == USE - || GET_CODE (PATTERN (insn)) == CLOBBER) - return 0; - - type = get_attr_type (insn); - - switch (type) - { - case TYPE_MFCR: - case TYPE_MFCRF: - case TYPE_MTCR: - case TYPE_DELAYED_CR: - case TYPE_CR_LOGICAL: - case TYPE_MTJMPR: - case TYPE_MFJMPR: - return 1; - case TYPE_IDIV: - case TYPE_LDIV: - return 2; - case TYPE_LOAD_L: - case TYPE_STORE_C: - case TYPE_ISYNC: - case TYPE_SYNC: - return 4; - default: - if (rs6000_cpu == PROCESSOR_POWER5 - && is_cracked_insn (insn)) - return 2; - return 0; - } -} - /* The function returns true if INSN is cracked into 2 instructions by the processor (and therefore occupies 2 issue slots). */ @@ -16649,6 +17203,74 @@ is_branch_slot_insn (rtx insn) return false; } +/* The function returns true if out_inst sets a value that is + used in the address generation computation of in_insn */ +static bool +set_to_load_agen (rtx out_insn, rtx in_insn) +{ + rtx out_set, in_set; + + /* For performance reasons, only handle the simple case where + both loads are a single_set. */ + out_set = single_set (out_insn); + if (out_set) + { + in_set = single_set (in_insn); + if (in_set) + return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set)); + } + + return false; +} + +/* The function returns true if the target storage location of + out_insn is adjacent to the target storage location of in_insn */ +/* Return 1 if memory locations are adjacent. */ + +static bool +adjacent_mem_locations (rtx insn1, rtx insn2) +{ + + rtx a = get_store_dest (PATTERN (insn1)); + rtx b = get_store_dest (PATTERN (insn2)); + + if ((GET_CODE (XEXP (a, 0)) == REG + || (GET_CODE (XEXP (a, 0)) == PLUS + && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT)) + && (GET_CODE (XEXP (b, 0)) == REG + || (GET_CODE (XEXP (b, 0)) == PLUS + && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT))) + { + HOST_WIDE_INT val0 = 0, val1 = 0; + rtx reg0, reg1; + int val_diff; + + if (GET_CODE (XEXP (a, 0)) == PLUS) + { + reg0 = XEXP (XEXP (a, 0), 0); + val0 = INTVAL (XEXP (XEXP (a, 0), 1)); + } + else + reg0 = XEXP (a, 0); + + if (GET_CODE (XEXP (b, 0)) == PLUS) + { + reg1 = XEXP (XEXP (b, 0), 0); + val1 = INTVAL (XEXP (XEXP (b, 0), 1)); + } + else + reg1 = XEXP (b, 0); + + val_diff = val1 - val0; + + return ((REGNO (reg0) == REGNO (reg1)) + && (val_diff == INTVAL (MEM_SIZE (a)) + || val_diff == -INTVAL (MEM_SIZE (b)))); + } + + return false; +} + /* A C statement (sans semicolon) to update the integer scheduling priority INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier, reduce the priority to execute INSN later. Do not @@ -16688,7 +17310,7 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority) } #endif - if (is_dispatch_slot_restricted (insn) + if (insn_must_be_first_in_group (insn) && reload_completed && current_sched_info->sched_max_insns_priority && rs6000_sched_restricted_insns_priority) @@ -16708,9 +17330,49 @@ rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority) return (priority + 1); } + if (rs6000_cpu == PROCESSOR_POWER6 + && ((load_store_pendulum == -2 && is_load_insn (insn)) + || (load_store_pendulum == 2 && is_store_insn (insn)))) + /* Attach highest priority to insn if the scheduler has just issued two + stores and this instruction is a load, or two loads and this instruction + is a store. Power6 wants loads and stores scheduled alternately + when possible */ + return current_sched_info->sched_max_insns_priority; + return priority; } +/* Return true if the instruction is nonpipelined on the Cell. */ +static bool +is_nonpipeline_insn (rtx insn) +{ + enum attr_type type; + if (!insn || !INSN_P (insn) + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + type = get_attr_type (insn); + if (type == TYPE_IMUL + || type == TYPE_IMUL2 + || type == TYPE_IMUL3 + || type == TYPE_LMUL + || type == TYPE_IDIV + || type == TYPE_LDIV + || type == TYPE_SDIV + || type == TYPE_DDIV + || type == TYPE_SSQRT + || type == TYPE_DSQRT + || type == TYPE_MFCR + || type == TYPE_MFCRF + || type == TYPE_MFJMPR) + { + return true; + } + return false; +} + + /* Return how many instructions the machine can issue per cycle. */ static int @@ -16731,6 +17393,7 @@ rs6000_issue_rate (void) case CPU_PPC750: case CPU_PPC7400: case CPU_PPC8540: + case CPU_CELL: return 2; case CPU_RIOS2: case CPU_PPC604: @@ -16740,6 +17403,7 @@ rs6000_issue_rate (void) return 4; case CPU_POWER4: case CPU_POWER5: + case CPU_POWER6: return 5; default: return 1; @@ -16754,9 +17418,29 @@ rs6000_use_sched_lookahead (void) { if (rs6000_cpu_attr == CPU_PPC8540) return 4; + if (rs6000_cpu_attr == CPU_CELL) + return (reload_completed ? 8 : 0); return 0; } +/* We are choosing insn from the ready queue. Return nonzero if INSN can be chosen. */ +static int +rs6000_use_sched_lookahead_guard (rtx insn) +{ + if (rs6000_cpu_attr != CPU_CELL) + return 1; + + if (insn == NULL_RTX || !INSN_P (insn)) + abort (); + + if (!reload_completed + || is_nonpipeline_insn (insn) + || is_microcoded_insn (insn)) + return 0; + + return 1; +} + /* Determine is PAT refers to memory. */ static bool @@ -16855,13 +17539,41 @@ is_store_insn (rtx insn) return is_store_insn1 (PATTERN (insn)); } +/* Return the dest of a store insn. */ + +static rtx +get_store_dest (rtx pat) +{ + gcc_assert (is_store_insn1 (pat)); + + if (GET_CODE (pat) == SET) + return SET_DEST (pat); + else if (GET_CODE (pat) == PARALLEL) + { + int i; + + for (i = 0; i < XVECLEN (pat, 0); i++) + { + rtx inner_pat = XVECEXP (pat, 0, i); + if (GET_CODE (inner_pat) == SET + && is_mem_ref (SET_DEST (inner_pat))) + return inner_pat; + } + } + /* We shouldn't get here, because we should have either a simple + store insn or a store with update which are covered above. */ + gcc_unreachable(); +} + /* Returns whether the dependence between INSN and NEXT is considered costly by the given target. */ static bool -rs6000_is_costly_dependence (rtx insn, rtx next, rtx link, int cost, - int distance) +rs6000_is_costly_dependence (dep_t dep, int cost, int distance) { + rtx insn; + rtx next; + /* If the flag is not enabled - no dependence is considered costly; allow all dependent insns in the same group. This is the most aggressive option. */ @@ -16874,6 +17586,9 @@ rs6000_is_costly_dependence (rtx insn, rtx next, rtx link, int cost, if (rs6000_sched_costly_dep == all_deps_costly) return true; + insn = DEP_PRO (dep); + next = DEP_CON (dep); + if (rs6000_sched_costly_dep == store_to_load_dep_costly && is_load_insn (next) && is_store_insn (insn)) @@ -16883,7 +17598,7 @@ rs6000_is_costly_dependence (rtx insn, rtx next, rtx link, int cost, if (rs6000_sched_costly_dep == true_store_to_load_dep_costly && is_load_insn (next) && is_store_insn (insn) - && (!link || (int) REG_NOTE_KIND (link) == 0)) + && DEP_KIND (dep) == REG_DEP_TRUE) /* Prevent load after store in the same group if it is a true dependence. */ return true; @@ -16924,6 +17639,237 @@ get_next_active_insn (rtx insn, rtx tail) return insn; } +/* We are about to begin issuing insns for this clock cycle. */ + +static int +rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose, + rtx *ready ATTRIBUTE_UNUSED, + int *pn_ready ATTRIBUTE_UNUSED, + int clock_var ATTRIBUTE_UNUSED) +{ + int n_ready = *pn_ready; + + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder :\n"); + + /* Reorder the ready list, if the second to last ready insn + is a nonepipeline insn. */ + if (rs6000_cpu_attr == CPU_CELL && n_ready > 1) + { + if (is_nonpipeline_insn (ready[n_ready - 1]) + && (recog_memoized (ready[n_ready - 2]) > 0)) + /* Simply swap first two insns. */ + { + rtx tmp = ready[n_ready - 1]; + ready[n_ready - 1] = ready[n_ready - 2]; + ready[n_ready - 2] = tmp; + } + } + + if (rs6000_cpu == PROCESSOR_POWER6) + load_store_pendulum = 0; + + return rs6000_issue_rate (); +} + +/* Like rs6000_sched_reorder, but called after issuing each insn. */ + +static int +rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready, + int *pn_ready, int clock_var ATTRIBUTE_UNUSED) +{ + if (sched_verbose) + fprintf (dump, "// rs6000_sched_reorder2 :\n"); + + /* For Power6, we need to handle some special cases to try and keep the + store queue from overflowing and triggering expensive flushes. + + This code monitors how load and store instructions are being issued + and skews the ready list one way or the other to increase the likelihood + that a desired instruction is issued at the proper time. + + A couple of things are done. First, we maintain a "load_store_pendulum" + to track the current state of load/store issue. + + - If the pendulum is at zero, then no loads or stores have been + issued in the current cycle so we do nothing. + + - If the pendulum is 1, then a single load has been issued in this + cycle and we attempt to locate another load in the ready list to + issue with it. + + - If the pendulum is -2, then two stores have already been + issued in this cycle, so we increase the priority of the first load + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum is -1, then a single store has been issued in this + cycle and we attempt to locate another store in the ready list to + issue with it, preferring a store to an adjacent memory location to + facilitate store pairing in the store queue. + + - If the pendulum is 2, then two loads have already been + issued in this cycle, so we increase the priority of the first store + in the ready list to increase it's likelihood of being chosen first + in the next cycle. + + - If the pendulum < -2 or > 2, then do nothing. + + Note: This code covers the most common scenarios. There exist non + load/store instructions which make use of the LSU and which + would need to be accounted for to strictly model the behavior + of the machine. Those instructions are currently unaccounted + for to help minimize compile time overhead of this code. + */ + if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn) + { + int pos; + int i; + rtx tmp; + + if (is_store_insn (last_scheduled_insn)) + /* Issuing a store, swing the load_store_pendulum to the left */ + load_store_pendulum--; + else if (is_load_insn (last_scheduled_insn)) + /* Issuing a load, swing the load_store_pendulum to the right */ + load_store_pendulum++; + else + return cached_can_issue_more; + + /* If the pendulum is balanced, or there is only one instruction on + the ready list, then all is well, so return. */ + if ((load_store_pendulum == 0) || (*pn_ready <= 1)) + return cached_can_issue_more; + + if (load_store_pendulum == 1) + { + /* A load has been issued in this cycle. Scan the ready list + for another load to issue with it */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos])) + { + /* Found a load. Move it to the head of the ready list, + and adjust it's priority so that it is more likely to + stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + break; + } + pos--; + } + } + else if (load_store_pendulum == -2) + { + /* Two stores have been issued in this cycle. Increase the + priority of the first load in the ready list to favor it for + issuing in the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_load_insn (ready[pos]) + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a load + was found and increased in priority. This is to prevent + increasing the priority of multiple loads */ + load_store_pendulum--; + + break; + } + pos--; + } + } + else if (load_store_pendulum == -1) + { + /* A store has been issued in this cycle. Scan the ready list for + another store to issue with it, preferring a store to an adjacent + memory location */ + int first_store_pos = -1; + + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos])) + { + /* Maintain the index of the first store found on the + list */ + if (first_store_pos == -1) + first_store_pos = pos; + + if (is_store_insn (last_scheduled_insn) + && adjacent_mem_locations (last_scheduled_insn,ready[pos])) + { + /* Found an adjacent store. Move it to the head of the + ready list, and adjust it's priority so that it is + more likely to stay there */ + tmp = ready[pos]; + for (i=pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + first_store_pos = -1; + + break; + }; + } + pos--; + } + + if (first_store_pos >= 0) + { + /* An adjacent store wasn't found, but a non-adjacent store was, + so move the non-adjacent store to the front of the ready + list, and adjust its priority so that it is more likely to + stay there. */ + tmp = ready[first_store_pos]; + for (i=first_store_pos; i<*pn_ready-1; i++) + ready[i] = ready[i + 1]; + ready[*pn_ready-1] = tmp; + if INSN_PRIORITY_KNOWN (tmp) + INSN_PRIORITY (tmp)++; + } + } + else if (load_store_pendulum == 2) + { + /* Two loads have been issued in this cycle. Increase the priority + of the first store in the ready list to favor it for issuing in + the next cycle. */ + pos = *pn_ready-1; + + while (pos >= 0) + { + if (is_store_insn (ready[pos]) + && INSN_PRIORITY_KNOWN (ready[pos])) + { + INSN_PRIORITY (ready[pos])++; + + /* Adjust the pendulum to account for the fact that a store + was found and increased in priority. This is to prevent + increasing the priority of multiple stores */ + load_store_pendulum++; + + break; + } + pos--; + } + } + } + + return cached_can_issue_more; +} + /* Return whether the presence of INSN causes a dispatch group termination of group WHICH_GROUP. @@ -16940,28 +17886,179 @@ get_next_active_insn (rtx insn, rtx tail) static bool insn_terminates_group_p (rtx insn, enum group_termination which_group) { - enum attr_type type; + bool first, last; if (! insn) return false; - type = get_attr_type (insn); + first = insn_must_be_first_in_group (insn); + last = insn_must_be_last_in_group (insn); - if (is_microcoded_insn (insn)) + if (first && last) return true; if (which_group == current_group) - { - if (is_branch_slot_insn (insn)) - return true; - return false; - } + return last; else if (which_group == previous_group) + return first; + + return false; +} + + +static bool +insn_must_be_first_in_group (rtx insn) +{ + enum attr_type type; + + if (!insn + || insn == NULL_RTX + || GET_CODE (insn) == NOTE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) { - if (is_dispatch_slot_restricted (insn)) - return true; - return false; + case PROCESSOR_POWER5: + if (is_cracked_insn (insn)) + return true; + case PROCESSOR_POWER4: + if (is_microcoded_insn (insn)) + return true; + + if (!rs6000_sched_groups) + return false; + + type = get_attr_type (insn); + + switch (type) + { + case TYPE_MFCR: + case TYPE_MFCRF: + case TYPE_MTCR: + case TYPE_DELAYED_CR: + case TYPE_CR_LOGICAL: + case TYPE_MTJMPR: + case TYPE_MFJMPR: + case TYPE_IDIV: + case TYPE_LDIV: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_ISYNC: + case TYPE_SYNC: + return true; + default: + break; + } + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_INSERT_DWORD: + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_SHIFT: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_TRAP: + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IDIV: + case TYPE_INSERT_WORD: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + case TYPE_LOAD_U: + case TYPE_LOAD_UX: + case TYPE_LOAD_EXT_UX: + case TYPE_STORE_U: + case TYPE_STORE_UX: + case TYPE_FPLOAD_U: + case TYPE_FPLOAD_UX: + case TYPE_FPSTORE_U: + case TYPE_FPSTORE_UX: + return true; + default: + break; + } + break; + default: + break; + } + + return false; +} + +static bool +insn_must_be_last_in_group (rtx insn) +{ + enum attr_type type; + + if (!insn + || insn == NULL_RTX + || GET_CODE (insn) == NOTE + || GET_CODE (PATTERN (insn)) == USE + || GET_CODE (PATTERN (insn)) == CLOBBER) + return false; + + switch (rs6000_cpu) { + case PROCESSOR_POWER4: + case PROCESSOR_POWER5: + if (is_microcoded_insn (insn)) + return true; + + if (is_branch_slot_insn (insn)) + return true; + + break; + case PROCESSOR_POWER6: + type = get_attr_type (insn); + + switch (type) + { + case TYPE_EXTS: + case TYPE_CNTLZ: + case TYPE_SHIFT: + case TYPE_VAR_SHIFT_ROTATE: + case TYPE_TRAP: + case TYPE_IMUL: + case TYPE_IMUL2: + case TYPE_IMUL3: + case TYPE_LMUL: + case TYPE_IDIV: + case TYPE_DELAYED_COMPARE: + case TYPE_IMUL_COMPARE: + case TYPE_LMUL_COMPARE: + case TYPE_FPCOMPARE: + case TYPE_MFCR: + case TYPE_MTCR: + case TYPE_MFJMPR: + case TYPE_MTJMPR: + case TYPE_ISYNC: + case TYPE_SYNC: + case TYPE_LOAD_L: + case TYPE_STORE_C: + return true; + default: + break; } + break; + default: + break; + } return false; } @@ -16973,24 +18070,24 @@ static bool is_costly_group (rtx *group_insns, rtx next_insn) { int i; - rtx link; - int cost; int issue_rate = rs6000_issue_rate (); for (i = 0; i < issue_rate; i++) { + dep_link_t link; rtx insn = group_insns[i]; + if (!insn) continue; - for (link = INSN_DEPEND (insn); link != 0; link = XEXP (link, 1)) + + FOR_EACH_DEP_LINK (link, INSN_FORW_DEPS (insn)) { - rtx next = XEXP (link, 0); - if (next == next_insn) - { - cost = insn_cost (insn, link, next_insn); - if (rs6000_is_costly_dependence (insn, next_insn, link, cost, 0)) - return true; - } + dep_t dep = DEP_LINK_DEP (link); + rtx next = DEP_CON (dep); + + if (next == next_insn + && rs6000_is_costly_dependence (dep, dep_cost (dep), 0)) + return true; } } @@ -17286,6 +18383,17 @@ pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail) return group_count; } +/* We're beginning a new block. Initialize data structures as necessary. */ + +static void +rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED, + int sched_verbose ATTRIBUTE_UNUSED, + int max_ready ATTRIBUTE_UNUSED) +{ + last_scheduled_insn = NULL_RTX; + load_store_pendulum = 0; +} + /* The following function is called at the end of scheduling BB. After reload, it inserts nops at insn group bundling. */ @@ -17993,6 +19101,14 @@ get_prev_label (tree function_name) return 0; } +#ifndef DARWIN_LINKER_GENERATES_ISLANDS +#define DARWIN_LINKER_GENERATES_ISLANDS 0 +#endif + +/* KEXTs still need branch islands. */ +#define DARWIN_GENERATE_ISLANDS (!DARWIN_LINKER_GENERATES_ISLANDS \ + || flag_mkernel || flag_apple_kext) + /* INSN is either a function call or a millicode call. It may have an unconditional jump in its delay slot. @@ -18003,7 +19119,8 @@ output_call (rtx insn, rtx *operands, int dest_operand_number, int cookie_operand_number) { static char buf[256]; - if (GET_CODE (operands[dest_operand_number]) == SYMBOL_REF + if (DARWIN_GENERATE_ISLANDS + && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF && (INTVAL (operands[cookie_operand_number]) & CALL_LONG)) { tree labelname; @@ -18759,11 +19876,12 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int *total) case MINUS: if (mode == DFmode) { - if (GET_CODE (XEXP (x, 0)) == MULT) + if (GET_CODE (XEXP (x, 0)) == MULT + || GET_CODE (XEXP (x, 1)) == MULT) { /* FNMA accounted in outer NEG. */ if (outer_code == NEG) - *total = 0; + *total = rs6000_cost->dmul - rs6000_cost->fp; else *total = rs6000_cost->dmul; } @@ -19147,6 +20265,102 @@ rs6000_emit_swdivdf (rtx res, rtx n, rtx d) gen_rtx_MULT (DFmode, v0, y3), u0))); } + +/* Emit popcount intrinsic on TARGET_POPCNTB targets. DST is the + target, and SRC is the argument operand. */ + +void +rs6000_emit_popcount (rtx dst, rtx src) +{ + enum machine_mode mode = GET_MODE (dst); + rtx tmp1, tmp2; + + tmp1 = gen_reg_rtx (mode); + + if (mode == SImode) + { + emit_insn (gen_popcntbsi2 (tmp1, src)); + tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101), + NULL_RTX, 0); + tmp2 = force_reg (SImode, tmp2); + emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24))); + } + else + { + emit_insn (gen_popcntbdi2 (tmp1, src)); + tmp2 = expand_mult (DImode, tmp1, + GEN_INT ((HOST_WIDE_INT) + 0x01010101 << 32 | 0x01010101), + NULL_RTX, 0); + tmp2 = force_reg (DImode, tmp2); + emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56))); + } +} + + +/* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the + target, and SRC is the argument operand. */ + +void +rs6000_emit_parity (rtx dst, rtx src) +{ + enum machine_mode mode = GET_MODE (dst); + rtx tmp; + + tmp = gen_reg_rtx (mode); + if (mode == SImode) + { + /* Is mult+shift >= shift+xor+shift+xor? */ + if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3)) + { + rtx tmp1, tmp2, tmp3, tmp4; + + tmp1 = gen_reg_rtx (SImode); + emit_insn (gen_popcntbsi2 (tmp1, src)); + + tmp2 = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16))); + tmp3 = gen_reg_rtx (SImode); + emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2)); + + tmp4 = gen_reg_rtx (SImode); + emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8))); + emit_insn (gen_xorsi3 (tmp, tmp3, tmp4)); + } + else + rs6000_emit_popcount (tmp, src); + emit_insn (gen_andsi3 (dst, tmp, const1_rtx)); + } + else + { + /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */ + if (rs6000_cost->muldi >= COSTS_N_INSNS (5)) + { + rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; + + tmp1 = gen_reg_rtx (DImode); + emit_insn (gen_popcntbdi2 (tmp1, src)); + + tmp2 = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32))); + tmp3 = gen_reg_rtx (DImode); + emit_insn (gen_xordi3 (tmp3, tmp1, tmp2)); + + tmp4 = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16))); + tmp5 = gen_reg_rtx (DImode); + emit_insn (gen_xordi3 (tmp5, tmp3, tmp4)); + + tmp6 = gen_reg_rtx (DImode); + emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8))); + emit_insn (gen_xordi3 (tmp, tmp5, tmp6)); + } + else + rs6000_emit_popcount (tmp, src); + emit_insn (gen_anddi3 (dst, tmp, const1_rtx)); + } +} + /* Return an RTX representing where to find the function value of a function returning MODE. */ static rtx @@ -19267,7 +20481,8 @@ rs6000_function_value (tree valtype, tree func ATTRIBUTE_UNUSED) && ALTIVEC_VECTOR_MODE (mode)) regno = ALTIVEC_ARG_RETURN; else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT - && (mode == DFmode || mode == DCmode)) + && (mode == DFmode || mode == DCmode + || mode == TFmode || mode == TCmode)) return spe_build_register_parallel (mode, GP_ARG_RETURN); else regno = GP_ARG_RETURN; @@ -19307,7 +20522,8 @@ rs6000_libcall_value (enum machine_mode mode) else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg) return rs6000_complex_function_value (mode); else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT - && (mode == DFmode || mode == DCmode)) + && (mode == DFmode || mode == DCmode + || mode == TFmode || mode == TCmode)) return spe_build_register_parallel (mode, GP_ARG_RETURN); else regno = GP_ARG_RETURN;