}
+/* Model cost for type demotion and promotion operations. PWR is normally
+ zero for single-step promotions and demotions. It will be one if
+ two-step promotion/demotion is required, and so on. Each additional
+ step doubles the number of instructions required. */
+
+static void
+vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
+ enum vect_def_type *dt, int pwr)
+{
+ int i, tmp;
+ int inside_cost = 0, outside_cost = 0, single_stmt_cost;
+
+ /* The SLP costs were already calculated during SLP tree build. */
+ if (PURE_SLP_STMT (stmt_info))
+ return;
+
+ single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
+ for (i = 0; i < pwr + 1; i++)
+ {
+ tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
+ (i + 1) : i;
+ inside_cost += vect_pow2 (tmp) * single_stmt_cost;
+ }
+
+ /* FORNOW: Assuming maximum 2 args per stmts. */
+ for (i = 0; i < 2; i++)
+ {
+ if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
+ outside_cost += vect_get_stmt_cost (vector_stmt);
+ }
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
+ "outside_cost = %d .", inside_cost, outside_cost);
+
+ /* Set the costs in STMT_INFO. */
+ stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
+ stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
+}
+
/* Function vect_cost_strided_group_size
For strided load or store, return the group_size only if it is the first
if (vect_print_dump_info (REPORT_COST))
fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
group_size);
-
}
/* Costs of the stores. */
case dr_explicit_realign:
{
*inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
- + vect_get_stmt_cost (vector_stmt));
+ + vect_get_stmt_cost (vec_perm));
/* FIXME: If the misalignment remains fixed across the iterations of
the containing loop, the following cost should be added to the
if (targetm.vectorize.builtin_mask_for_load)
*inside_cost += vect_get_stmt_cost (vector_stmt);
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump, "vect_model_load_cost: explicit realign");
+
break;
}
case dr_explicit_realign_optimized:
}
*inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
- + vect_get_stmt_cost (vector_stmt));
+ + vect_get_stmt_cost (vec_perm));
+
+ if (vect_print_dump_info (REPORT_COST))
+ fprintf (vect_dump,
+ "vect_model_load_cost: explicit realign optimized");
+
break;
}
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vectorizable_conversion ===");
if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
- STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ {
+ STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+ vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+ }
else if (modifier == NARROW)
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
- vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
}
else
{
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
- vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
+ vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
}
VEC_free (tree, heap, interm_types);
return true;
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+ if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
+ ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
+ size_zero_node) < 0)
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "negative step for store.");
if (!STMT_VINFO_DATA_REF (stmt_info))
return false;
- negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+ negative = tree_int_cst_compare (nested_in_vect_loop
+ ? STMT_VINFO_DR_STEP (stmt_info)
+ : DR_STEP (dr),
+ size_zero_node) < 0;
if (negative && ncopies > 1)
{
if (vect_print_dump_info (REPORT_DETAILS))
This can only occur when vectorizing memory accesses in the inner-loop
nested within an outer-loop that is being vectorized. */
- if (loop && nested_in_vect_loop_p (loop, stmt)
+ if (nested_in_vect_loop
&& (TREE_INT_CST_LOW (DR_STEP (dr))
% GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{