* config/i386/i386.c (expand_vec_perm_pshufb2): Commit

[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-stmts.c
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c

index 6ce742c..7deaffc 100644 (file)
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -811,6 +811,46 @@ vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
  }
  
  
+/* Model cost for type demotion and promotion operations.  PWR is normally
+   zero for single-step promotions and demotions.  It will be one if 
+   two-step promotion/demotion is required, and so on.  Each additional
+   step doubles the number of instructions required.  */
+
+static void
+vect_model_promotion_demotion_cost (stmt_vec_info stmt_info,
+                                   enum vect_def_type *dt, int pwr)
+{
+  int i, tmp;
+  int inside_cost = 0, outside_cost = 0, single_stmt_cost;
+
+  /* The SLP costs were already calculated during SLP tree build.  */
+  if (PURE_SLP_STMT (stmt_info))
+    return;
+
+  single_stmt_cost = vect_get_stmt_cost (vec_promote_demote);
+  for (i = 0; i < pwr + 1; i++)
+    {
+      tmp = (STMT_VINFO_TYPE (stmt_info) == type_promotion_vec_info_type) ?
+       (i + 1) : i;
+      inside_cost += vect_pow2 (tmp) * single_stmt_cost;
+    }
+
+  /* FORNOW: Assuming maximum 2 args per stmts.  */
+  for (i = 0; i < 2; i++)
+    {
+      if (dt[i] == vect_constant_def || dt[i] == vect_external_def)
+        outside_cost += vect_get_stmt_cost (vector_stmt);
+    }
+
+  if (vect_print_dump_info (REPORT_COST))
+    fprintf (vect_dump, "vect_model_promotion_demotion_cost: inside_cost = %d, "
+             "outside_cost = %d .", inside_cost, outside_cost);
+
+  /* Set the costs in STMT_INFO.  */
+  stmt_vinfo_set_inside_of_loop_cost (stmt_info, NULL, inside_cost);
+  stmt_vinfo_set_outside_of_loop_cost (stmt_info, NULL, outside_cost);
+}
+
  /* Function vect_cost_strided_group_size
  
     For strided load or store, return the group_size only if it is the first
@@ -882,12 +922,11 @@ vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
      {
        /* Uses a high and low interleave operation for each needed permute.  */
        inside_cost = ncopies * exact_log2(group_size) * group_size
-        * vect_get_stmt_cost (vector_stmt);
+        * vect_get_stmt_cost (vec_perm);
  
        if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
                   group_size);
-
      }
  
    /* Costs of the stores.  */
@@ -988,7 +1027,7 @@ vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, bool load_lanes_p,
      {
        /* Uses an even and odd extract operations for each needed permute.  */
        inside_cost = ncopies * exact_log2(group_size) * group_size
-       * vect_get_stmt_cost (vector_stmt);
+       * vect_get_stmt_cost (vec_perm);
  
        if (vect_print_dump_info (REPORT_COST))
          fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
@@ -1049,7 +1088,7 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
      case dr_explicit_realign:
        {
          *inside_cost += ncopies * (2 * vect_get_stmt_cost (vector_load)
-           + vect_get_stmt_cost (vector_stmt));
+                                  + vect_get_stmt_cost (vec_perm));
  
          /* FIXME: If the misalignment remains fixed across the iterations of
             the containing loop, the following cost should be added to the
@@ -1057,6 +1096,9 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
          if (targetm.vectorize.builtin_mask_for_load)
            *inside_cost += vect_get_stmt_cost (vector_stmt);
  
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump, "vect_model_load_cost: explicit realign");
+
          break;
        }
      case dr_explicit_realign_optimized:
@@ -1080,7 +1122,12 @@ vect_get_load_cost (struct data_reference *dr, int ncopies,
            }
  
          *inside_cost += ncopies * (vect_get_stmt_cost (vector_load)
-          + vect_get_stmt_cost (vector_stmt));
+                                  + vect_get_stmt_cost (vec_perm));
+
+        if (vect_print_dump_info (REPORT_COST))
+          fprintf (vect_dump,
+                  "vect_model_load_cost: explicit realign optimized");
+
          break;
        }
  
@@ -2392,16 +2439,19 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
        if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "=== vectorizable_conversion ===");
        if (code == FIX_TRUNC_EXPR || code == FLOAT_EXPR)
-       STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+        {
+         STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
+         vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+       }
        else if (modifier == NARROW)
         {
           STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
-         vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
+         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
         }
        else
         {
           STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
-         vect_model_simple_cost (stmt_info, 2 * ncopies, dt, NULL);
+         vect_model_promotion_demotion_cost (stmt_info, dt, multi_step_cvt);
         }
        VEC_free (tree, heap, interm_types);
        return true;
@@ -3457,22 +3507,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
    /* Handle def.  */
    vec_dest = vect_create_destination_var (scalar_dest, vectype);
  
-  /* Allocate VECs for vector operands.  In case of SLP, vector operands are
-     created in the previous stages of the recursion, so no allocation is
-     needed, except for the case of shift with scalar shift argument.  In that
-     case we store the scalar operand in VEC_OPRNDS1 for every vector stmt to
-     be created to vectorize the SLP group, i.e., SLP_NODE->VEC_STMTS_SIZE.
-     In case of loop-based vectorization we allocate VECs of size 1.  We
-     allocate VEC_OPRNDS1 only in case of binary operation.  */
-  if (!slp_node)
-    {
-      vec_oprnds0 = VEC_alloc (tree, heap, 1);
-      if (op_type == binary_op || op_type == ternary_op)
-        vec_oprnds1 = VEC_alloc (tree, heap, 1);
-      if (op_type == ternary_op)
-        vec_oprnds2 = VEC_alloc (tree, heap, 1);
-    }
-
    /* In case the vectorization factor (VF) is bigger than the number
       of elements that we can fit in a vectype (nunits), we have to generate
       more than one vector stmt - i.e - we need to "unroll" the
@@ -3703,7 +3737,9 @@ vectorizable_store (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
    if (!STMT_VINFO_DATA_REF (stmt_info))
      return false;
  
-  if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
+  if (tree_int_cst_compare (loop && nested_in_vect_loop_p (loop, stmt)
+                           ? STMT_VINFO_DR_STEP (stmt_info) : DR_STEP (dr),
+                           size_zero_node) < 0)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "negative step for store.");
@@ -4216,7 +4252,10 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
    if (!STMT_VINFO_DATA_REF (stmt_info))
      return false;
  
-  negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
+  negative = tree_int_cst_compare (nested_in_vect_loop
+                                  ? STMT_VINFO_DR_STEP (stmt_info)
+                                  : DR_STEP (dr),
+                                  size_zero_node) < 0;
    if (negative && ncopies > 1)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
@@ -4603,7 +4642,7 @@ vectorizable_load (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt,
       This can only occur when vectorizing memory accesses in the inner-loop
       nested within an outer-loop that is being vectorized.  */
  
-  if (loop && nested_in_vect_loop_p (loop, stmt)
+  if (nested_in_vect_loop
        && (TREE_INT_CST_LOW (DR_STEP (dr))
           % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
      {
@@ -5640,7 +5679,7 @@ new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo,
    else
      STMT_VINFO_DEF_TYPE (res) = vect_internal_def;
  
-  STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
+  STMT_VINFO_SAME_ALIGN_REFS (res) = NULL;
    STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
    STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
    STMT_SLP_TYPE (res) = loop_vect;
@@ -5734,11 +5773,6 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
        && GET_MODE_CLASS (inner_mode) != MODE_FLOAT)
      return NULL_TREE;
  
-  /* We can't build a vector type of elements with alignment bigger than
-     their size.  */
-  if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
-    return NULL_TREE;
-
    /* For vector types of elements whose mode precision doesn't
       match their types precision we use a element type of mode
       precision.  The vectorization routines will have to make sure
@@ -5755,11 +5789,22 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, unsigned size)
       When the component mode passes the above test simply use a type
       corresponding to that mode.  The theory is that any use that
       would cause problems with this will disable vectorization anyway.  */
-  if (!SCALAR_FLOAT_TYPE_P (scalar_type)
-      && !INTEGRAL_TYPE_P (scalar_type)
-      && !POINTER_TYPE_P (scalar_type))
+  else if (!SCALAR_FLOAT_TYPE_P (scalar_type)
+          && !INTEGRAL_TYPE_P (scalar_type)
+          && !POINTER_TYPE_P (scalar_type))
      scalar_type = lang_hooks.types.type_for_mode (inner_mode, 1);
  
+  /* We can't build a vector type of elements with alignment bigger than
+     their size.  */
+  else if (nbytes < TYPE_ALIGN_UNIT (scalar_type))
+    scalar_type = lang_hooks.types.type_for_mode (inner_mode, 
+                                                 TYPE_UNSIGNED (scalar_type));
+
+  /* If we felt back to using the mode fail if there was
+     no scalar type for it.  */
+  if (scalar_type == NULL_TREE)
+    return NULL_TREE;
+
    /* If no size was supplied use the mode the target prefers.   Otherwise
       lookup a vector mode of the specified size.  */
    if (size == 0)