}
+/* Model cost for type demotion and promotion operations. PWR is normally
+ zero for single-step promotions and demotions. It will be one if
+ two-step promotion/demotion is required, and so on. Each additional
+ step doubles the number of instructions required. */
+
+static void
+vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
+ enum vect_def_type *dt, int pwr)
+{
+ int i, tmp;
+ int inside_cost = 0, outside_cost = 0, single_stmt_cost;
+
+ /* The SLP costs were already calculated during SLP tree build. */
+ if (PURE_SLP_STMT (stmt_info))
+ return;
+
+ single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
+ for (i = 0; i < pwr + 1; i++)
+ {
+ tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
+ (i + 1) : i;
+ inside_cost += vect_pow2 (tmp) * single_stmt_cost;
+ }
+
+ /* FORNOW: Assuming maximum 2 args per stmts. */
+ for (i = 0; i < 2; i++)
+ {
+ if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
+ outside_cost += vect_get_stmt_cost (vector_stmt);
+ }
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
+ "outside_cost = %d .", inside_cost, outside_cost);
+
+ /* Set the costs in STMT_INFO. */
+ stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
+ stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
+}
+
/* Function vect_cost_strided_group_size
For strided load or store, return the group_size only if it is the first
{
/* Uses a high and low interleave operation for each needed permute. */
inside_cost = ncopies * exact_log2(group_size) * group_size
- * vect_get_stmt_cost (vector_stmt);
+ * vect_get_stmt_cost (vec_perm);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
group_size);
-
}
/* Costs of the stores. */
{
/* Uses an even and odd extract operations for each needed permute. */
inside_cost = ncopies * exact_log2(group_size) * group_size
- * vect_get_stmt_cost (vector_stmt);
+ * vect_get_stmt_cost (vec_perm);
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
case dr_explicit_realign:
{
*inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
- + vect_get_stmt_cost (vector_stmt));
+ + vect_get_stmt_cost (vec_perm));
/* FIXME: If the misalignment remains fixed across the iterations of
the containing loop, the following cost should be added to the
if (targetm.vectorize.builtin_mask_for_load)
*inside_cost += vect_get_stmt_cost (vector_stmt);
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_load_cost: explicit realign");
+
break;
}
case dr_explicit_realign_optimized:
}
*inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
- + vect_get_stmt_cost (vector_stmt));
+ + vect_get_stmt_cost (vec_perm));
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump,
+ "vect_model_load_cost: explicit realign optimized");
+
break;
}
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vectorizable_conversion ===");
if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
- STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ {
+ STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+ }
else if (modifier == NARROW)
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
}
else
{
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
- vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
}
VEC_free (tree, heap, interm_types);
return true;
/* Handle def. */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
- /* Allocate VECs for vector operands. In case of SLP, vector operands are
- created in the previous stages of the recursion, so no allocation is
- needed, except for the case of shift with scalar shift argument. In that
- case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
- be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
- In case of loop-based vectorization we allocate VECs of size 1. We
- allocate VEC_OPRNDS1 only in case of binary operation. */
- if (!slp_node)
- {
- vec_oprnds0 = VEC_alloc (tree, heap, 1);
- if (op_type == binary_op || op_type == ternary_op)
- vec_oprnds1 = VEC_alloc (tree, heap, 1);
- if (op_type == ternary_op)
- vec_oprnds2 = VEC_alloc (tree, heap, 1);
- }
-
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
more than one vector stmt - i.e - we need to "unroll" the
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+ if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
+ ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
+ size_zero_node) < 0)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "negative step for store.");
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+ negative = tree_int_cst_compare (nested_in_vect_loop
+ ? STMT_VINFO_DR_STEP (stmt_info)
+ : DR_STEP (dr),
+ size_zero_node) < 0;
if (negative && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
This can only occur when vectorizing memory accesses in the inner-loop
nested within an outer-loop that is being vectorized. */
- if (loop && nested_in_vect_loop_p (loop, stmt)
+ if (nested_in_vect_loop
&& (TREE_INT_CST_LOW (DR_STEP (dr))
% GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{
else
STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
- STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
+ STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
STMT_SLP_TYPE (res) = loop_vect;
&& GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
return NULL_TREE;
- /* We can't build a vector type of elements with alignment bigger than
- their size. */
- if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
- return NULL_TREE;
-
/* For vector types of elements whose mode precision doesn't
match their types precision we use a element type of mode
precision. The vectorization routines will have to make sure
When the component mode passes the above test simply use a type
corresponding to that mode. The theory is that any use that
would cause problems with this will disable vectorization anyway. */
- if (!SCALAR_FLOAT_TYPE_P (scalar_type)
- && !INTEGRAL_TYPE_P (scalar_type)
- && !POINTER_TYPE_P (scalar_type))
+ else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
+ && !INTEGRAL_TYPE_P (scalar_type)
+ && !POINTER_TYPE_P (scalar_type))
scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
+ /* We can't build a vector type of elements with alignment bigger than
+ their size. */
+ else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
+ scalar_type = lang_hooks.types.type_for_mode (inner_mode,
+ TYPE_UNSIGNED (scalar_type));
+
+ /* If we felt back to using the mode fail if there was
+ no scalar type for it. */
+ if (scalar_type == NULL_TREE)
+ return NULL_TREE;
+
/* If no size was supplied use the mode the target prefers. Otherwise
lookup a vector mode of the specified size. */
if (size == 0)