X-Git-Url: http://git.sourceforge.jp/view?a=blobdiff_plain;f=gcc%2Ftree-vect-stmts.c;h=7deaffcb5da349f88035f203767cead20546b0b0;hb=7aec221bfaeacd43c0075b8de011f88862afdffd;hp=6ce742cdfa73f74f12792f46dc8cf888f29e53ae;hpb=bed8b93be659c8b8bb94001842533cdd5cbf460d;p=pf3gnuchains%2Fgcc-fork.git diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c index 6ce742cdfa7..7deaffcb5da 100644 --- a/gcc/tree-vect-stmts.c +++ b/gcc/tree-vect-stmts.c @@ -811,6 +811,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, } +/* Model cost for type demotion and promotion operations. PWR is normally + zero for single-step promotions and demotions. It will be one if + two-step promotion/demotion is required, and so on. Each additional + step doubles the number of instructions required. */ + +static void +vect_model_promotion_demotion_cost (stmt_vec_info stmt_info, + enum vect_def_type *dt, int pwr) +{ + int i, tmp; + int inside_cost = 0, outside_cost = 0, single_stmt_cost; + + /* The SLP costs were already calculated during SLP tree build. */ + if (PURE_SLP_STMT (stmt_info)) + return; + + single_stmt_cost = vect_get_stmt_cost (vec_promote_demote); + for (i = 0; i < pwr + 1; i++) + { + tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ? + (i + 1) : i; + inside_cost += vect_pow2 (tmp) * single_stmt_cost; + } + + /* FORNOW: Assuming maximum 2 args per stmts. */ + for (i = 0; i < 2; i++) + { + if (dt[i] == vect_constant_def || dt[i] == vect_external_def) + outside_cost += vect_get_stmt_cost (vector_stmt); + } + + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, " + "outside_cost = %d .", inside_cost, outside_cost); + + /* Set the costs in STMT_INFO. */ + stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost); + stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost); +} + /* Function vect_cost_strided_group_size For strided load or store, return the group_size only if it is the first @@ -882,12 +922,11 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, { /* Uses a high and low interleave operation for each needed permute. */ inside_cost = ncopies * exact_log2(group_size) * group_size - * vect_get_stmt_cost (vector_stmt); + * vect_get_stmt_cost (vec_perm); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .", group_size); - } /* Costs of the stores. */ @@ -988,7 +1027,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p, { /* Uses an even and odd extract operations for each needed permute. */ inside_cost = ncopies * exact_log2(group_size) * group_size - * vect_get_stmt_cost (vector_stmt); + * vect_get_stmt_cost (vec_perm); if (vect_print_dump_info (REPORT_COST)) fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .", @@ -1049,7 +1088,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, case dr_explicit_realign: { *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load) - + vect_get_stmt_cost (vector_stmt)); + + vect_get_stmt_cost (vec_perm)); /* FIXME: If the misalignment remains fixed across the iterations of the containing loop, the following cost should be added to the @@ -1057,6 +1096,9 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, if (targetm.vectorize.builtin_mask_for_load) *inside_cost += vect_get_stmt_cost (vector_stmt); + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, "vect_model_load_cost: explicit realign"); + break; } case dr_explicit_realign_optimized: @@ -1080,7 +1122,12 @@ vect_get_load_cost (struct data_reference *dr, int ncopies, } *inside_cost += ncopies * (vect_get_stmt_cost (vector_load) - + vect_get_stmt_cost (vector_stmt)); + + vect_get_stmt_cost (vec_perm)); + + if (vect_print_dump_info (REPORT_COST)) + fprintf (vect_dump, + "vect_model_load_cost: explicit realign optimized"); + break; } @@ -2392,16 +2439,19 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi, if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "=== vectorizable_conversion ==="); if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR) - STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; + { + STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type; + vect_model_simple_cost (stmt_info, ncopies, dt, NULL); + } else if (modifier == NARROW) { STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type; - vect_model_simple_cost (stmt_info, ncopies, dt, NULL); + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); } else { STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type; - vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL); + vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt); } VEC_free (tree, heap, interm_types); return true; @@ -3457,22 +3507,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi, /* Handle def. */ vec_dest = vect_create_destination_var (scalar_dest, vectype); - /* Allocate VECs for vector operands. In case of SLP, vector operands are - created in the previous stages of the recursion, so no allocation is - needed, except for the case of shift with scalar shift argument. In that - case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to - be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE. - In case of loop-based vectorization we allocate VECs of size 1. We - allocate VEC_OPRNDS1 only in case of binary operation. */ - if (!slp_node) - { - vec_oprnds0 = VEC_alloc (tree, heap, 1); - if (op_type == binary_op || op_type == ternary_op) - vec_oprnds1 = VEC_alloc (tree, heap, 1); - if (op_type == ternary_op) - vec_oprnds2 = VEC_alloc (tree, heap, 1); - } - /* In case the vectorization factor (VF) is bigger than the number of elements that we can fit in a vectype (nunits), we have to generate more than one vector stmt - i.e - we need to "unroll" the @@ -3703,7 +3737,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0) + if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt) + ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr), + size_zero_node) < 0) { if (vect_print_dump_info (REPORT_DETAILS)) fprintf (vect_dump, "negative step for store."); @@ -4216,7 +4252,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, if (!STMT_VINFO_DATA_REF (stmt_info)) return false; - negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0; + negative = tree_int_cst_compare (nested_in_vect_loop + ? STMT_VINFO_DR_STEP (stmt_info) + : DR_STEP (dr), + size_zero_node) < 0; if (negative && ncopies > 1) { if (vect_print_dump_info (REPORT_DETAILS)) @@ -4603,7 +4642,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt, This can only occur when vectorizing memory accesses in the inner-loop nested within an outer-loop that is being vectorized. */ - if (loop && nested_in_vect_loop_p (loop, stmt) + if (nested_in_vect_loop && (TREE_INT_CST_LOW (DR_STEP (dr)) % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)) { @@ -5640,7 +5679,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo, else STMT_VINFO_DEF_TYPE (res) = vect_internal_def; - STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5); + STMT_VINFO_SAME_ALIGN_REFS (res) = NULL; STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0; STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0; STMT_SLP_TYPE (res) = loop_vect; @@ -5734,11 +5773,6 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size) && GET_MODE_CLASS (inner_mode) != MODE_FLOAT) return NULL_TREE; - /* We can't build a vector type of elements with alignment bigger than - their size. */ - if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) - return NULL_TREE; - /* For vector types of elements whose mode precision doesn't match their types precision we use a element type of mode precision. The vectorization routines will have to make sure @@ -5755,11 +5789,22 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size) When the component mode passes the above test simply use a type corresponding to that mode. The theory is that any use that would cause problems with this will disable vectorization anyway. */ - if (!SCALAR_FLOAT_TYPE_P (scalar_type) - && !INTEGRAL_TYPE_P (scalar_type) - && !POINTER_TYPE_P (scalar_type)) + else if (!SCALAR_FLOAT_TYPE_P (scalar_type) + && !INTEGRAL_TYPE_P (scalar_type) + && !POINTER_TYPE_P (scalar_type)) scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1); + /* We can't build a vector type of elements with alignment bigger than + their size. */ + else if (nbytes < TYPE_ALIGN_UNIT (scalar_type)) + scalar_type = lang_hooks.types.type_for_mode (inner_mode, + TYPE_UNSIGNED (scalar_type)); + + /* If we felt back to using the mode fail if there was + no scalar type for it. */ + if (scalar_type == NULL_TREE) + return NULL_TREE; + /* If no size was supplied use the mode the target prefers. Otherwise lookup a vector mode of the specified size. */ if (size == 0)