OSDN Git Service

2011-05-27 Alexander Monakov <amonakov@ispras.ru>
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-slp.c
index 784db84..b060935 100644 (file)
@@ -215,7 +215,8 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
            vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
          else
            /* Store.  */
-           vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
+           vect_model_store_cost (stmt_info, ncopies_for_cost, false,
+                                  dt[0], slp_node);
        }
 
       else
@@ -243,14 +244,21 @@ vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
          else
            {
              /* Not first stmt of the group, check that the def-stmt/s match
-                the def-stmt/s of the first stmt.  */
+                the def-stmt/s of the first stmt.  Allow different definition
+                types for reduction chains: the first stmt must be a
+                vect_reduction_def (a phi node), and the rest
+                vect_internal_def.  */
              if ((i == 0
-                  && (*first_stmt_dt0 != dt[i]
+                  && ((*first_stmt_dt0 != dt[i]
+                        && !(*first_stmt_dt0 == vect_reduction_def
+                             && dt[i] == vect_internal_def))
                       || (*first_stmt_def0_type && def
                           && !types_compatible_p (*first_stmt_def0_type,
                                                   TREE_TYPE (def)))))
                  || (i == 1
-                     && (*first_stmt_dt1 != dt[i]
+                     && ((*first_stmt_dt1 != dt[i]
+                           && !(*first_stmt_dt1 == vect_reduction_def
+                                && dt[i] == vect_internal_def))
                          || (*first_stmt_def1_type && def
                              && !types_compatible_p (*first_stmt_def1_type,
                                                      TREE_TYPE (def)))))
@@ -508,10 +516,10 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
            {
              /* Load.  */
               /* FORNOW: Check that there is no gap between the loads.  */
-              if ((DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt
-                   && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
-                  || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
-                      && DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
+              if ((GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
+                   && GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
+                  || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt
+                      && GROUP_GAP (vinfo_for_stmt (stmt)) != 1))
                 {
                   if (vect_print_dump_info (REPORT_SLP))
                     {
@@ -525,7 +533,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
               /* Check that the size of interleaved loads group is not
                  greater than the SLP group size.  */
-              if (DR_GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
+              if (GROUP_SIZE (vinfo_for_stmt (stmt)) > ncopies * group_size)
                 {
                   if (vect_print_dump_info (REPORT_SLP))
                     {
@@ -538,7 +546,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
                   return false;
                 }
 
-              first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+              first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
               if (prev_first_load)
                 {
                   /* Check that there are no loads from different interleaving
@@ -579,7 +587,7 @@ vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 
                   /* Analyze costs (for the first stmt in the group).  */
                   vect_model_load_cost (vinfo_for_stmt (stmt),
-                                        ncopies_for_cost, *node);
+                                        ncopies_for_cost, false, *node);
                 }
 
               /* Store the place of this load in the interleaving chain.  In
@@ -783,7 +791,7 @@ vect_supported_slp_permutation_p (slp_instance instance)
 {
   slp_tree node = VEC_index (slp_tree, SLP_INSTANCE_LOADS (instance), 0);
   gimple stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (node), 0);
-  gimple first_load = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt));
+  gimple first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
   VEC (slp_tree, heap) *sorted_loads = NULL;
   int index;
   slp_tree *tmp_loads = NULL;
@@ -802,7 +810,7 @@ vect_supported_slp_permutation_p (slp_instance instance)
     {
       gimple scalar_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (load), 0);
       /* Check that the loads are all in the same interleaving chain.  */
-      if (DR_GROUP_FIRST_DR (vinfo_for_stmt (scalar_stmt)) != first_load)
+      if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (scalar_stmt)) != first_load)
         {
           if (vect_print_dump_info (REPORT_DETAILS))
             {
@@ -932,7 +940,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
                 first = stmt;
               else
                 {
-                  if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != first)
+                  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != first)
                     {
                       if (complex_numbers != 2)
                         return false;
@@ -947,7 +955,7 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
                       other_node_first = VEC_index (gimple, 
                                 SLP_TREE_SCALAR_STMTS (other_complex_node), 0);
 
-                      if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) 
+                      if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
                           != other_node_first)
                        return false;
                     }
@@ -973,8 +981,10 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
      GROUP_SIZE.  */
   number_of_groups = VEC_length (int, load_permutation) / group_size;
 
-  /* Reduction (there are no data-refs in the root).  */
-  if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
+  /* Reduction (there are no data-refs in the root).
+     In reduction chain the order of the loads is important.  */
+  if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
+      && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
       int first_group_load_index;
 
@@ -1002,7 +1012,36 @@ vect_supported_load_permutation_p (slp_instance slp_instn, int group_size,
 
       if (!bad_permutation)
         {
-          /* This permutaion is valid for reduction.  Since the order of the
+          /* Check that the loads in the first sequence are different and there
+             are no gaps between them.  */
+          load_index = sbitmap_alloc (group_size);
+          sbitmap_zero (load_index);
+          for (k = 0; k < group_size; k++)
+            {
+              first_group_load_index = VEC_index (int, load_permutation, k);
+              if (TEST_BIT (load_index, first_group_load_index))
+                {
+                  bad_permutation = true;
+                  break;
+                }
+
+              SET_BIT (load_index, first_group_load_index);
+            }
+
+          if (!bad_permutation)
+            for (k = 0; k < group_size; k++)
+              if (!TEST_BIT (load_index, k))
+                {
+                  bad_permutation = true;
+                  break;
+                }
+
+          sbitmap_free (load_index);
+        }
+
+      if (!bad_permutation)
+        {
+          /* This permutation is valid for reduction.  Since the order of the
              statements in the nodes is not important unless they are memory
              accesses, we can rearrange the statements in all the nodes 
              according to the order of the loads.  */
@@ -1112,7 +1151,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
 {
   slp_instance new_instance;
   slp_tree node = XNEW (struct _slp_tree);
-  unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+  unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
   unsigned int unrolling_factor = 1, nunits;
   tree vectype, scalar_type = NULL_TREE;
   gimple next;
@@ -1123,11 +1162,20 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   VEC (slp_tree, heap) *loads;
   struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
 
-  if (dr)
+  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
-      scalar_type = TREE_TYPE (DR_REF (dr));
-      vectype = get_vectype_for_scalar_type (scalar_type);
-      group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
+      if (dr)
+        {
+          scalar_type = TREE_TYPE (DR_REF (dr));
+          vectype = get_vectype_for_scalar_type (scalar_type);
+        }
+      else
+        {
+          gcc_assert (loop_vinfo);
+          vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
+        }
+
+      group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
     }
   else
     {
@@ -1168,13 +1216,13 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
   /* Create a node (a root of the SLP tree) for the packed strided stores.  */
   SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
   next = stmt;
-  if (dr)
+  if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
     {
       /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS.  */
       while (next)
         {
           VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
-          next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+          next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
         }
     }
   else
@@ -1183,14 +1231,7 @@ vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
       for (i = 0; VEC_iterate (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, 
                                next); 
            i++)
-        {
-          VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
-          if (vect_print_dump_info (REPORT_DETAILS))
-            {
-              fprintf (vect_dump, "pushing reduction into node: ");
-              print_gimple_stmt (vect_dump, next, 0, TDF_SLIM);
-            }
-        }
+        VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
     }
 
   SLP_TREE_VEC_STMTS (node) = NULL;
@@ -1283,8 +1324,8 @@ bool
 vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 {
   unsigned int i;
-  VEC (gimple, heap) *strided_stores, *reductions = NULL;
-  gimple store;
+  VEC (gimple, heap) *strided_stores, *reductions = NULL, *reduc_chains = NULL;
+  gimple first_element;
   bool ok = false;
 
   if (vect_print_dump_info (REPORT_SLP))
@@ -1293,14 +1334,15 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
   if (loop_vinfo)
     {
       strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
+      reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
       reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
     }
   else
     strided_stores = BB_VINFO_STRIDED_STORES (bb_vinfo);
 
   /* Find SLP sequences starting from groups of strided stores.  */
-  FOR_EACH_VEC_ELT (gimple, strided_stores, i, store)
-    if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, store))
+  FOR_EACH_VEC_ELT (gimple, strided_stores, i, first_element)
+    if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
       ok = true;
 
   if (bb_vinfo && !ok)
@@ -1311,6 +1353,21 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
       return false;
     }
 
+  if (loop_vinfo
+      && VEC_length (gimple, LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)) > 0)
+    {
+      /* Find SLP sequences starting from reduction chains.  */
+      FOR_EACH_VEC_ELT (gimple, reduc_chains, i, first_element)
+        if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element))
+          ok = true;
+        else
+          return false;
+
+      /* Don't try to vectorize SLP reductions if reduction chain was
+         detected.  */
+      return ok;
+    }
+
   /* Find SLP sequences starting from groups of reductions.  */
   if (loop_vinfo && VEC_length (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo)) > 1
       && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, 
@@ -1322,9 +1379,10 @@ vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
 
 
 /* For each possible SLP instance decide whether to SLP it and calculate overall
-   unrolling factor needed to SLP the loop.  */
+   unrolling factor needed to SLP the loop.  Return TRUE if decided to SLP at
+   least one instance.  */
 
-void
+bool
 vect_make_slp_decision (loop_vec_info loop_vinfo)
 {
   unsigned int i, unrolling_factor = 1;
@@ -1353,6 +1411,8 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
   if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
     fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
             decided_to_slp, unrolling_factor);
+
+  return (decided_to_slp > 0);
 }
 
 
@@ -1457,6 +1517,8 @@ destroy_bb_vec_info (bb_vec_info bb_vinfo)
         free_stmt_vec_info (stmt);
     }
 
+  free_data_refs (BB_VINFO_DATAREFS (bb_vinfo));
+  free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
   VEC_free (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo));
   VEC_free (slp_instance, heap, BB_VINFO_SLP_INSTANCES (bb_vinfo));
   free (bb_vinfo);
@@ -1811,13 +1873,14 @@ vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
 
 /* For constant and loop invariant defs of SLP_NODE this function returns
    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
-   OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
-   stmts. NUMBER_OF_VECTORS is the number of vector defs to create.  
+   OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
+   scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
    REDUC_INDEX is the index of the reduction operand in the statements, unless
    it is -1.  */
 
 static void
-vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
+vect_get_constant_vectors (tree op, slp_tree slp_node,
+                           VEC (tree, heap) **vec_oprnds,
                           unsigned int op_num, unsigned int number_of_vectors,
                            int reduc_index)
 {
@@ -1829,17 +1892,17 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
   tree t = NULL_TREE;
   int j, number_of_places_left_in_vector;
   tree vector_type;
-  tree op, vop;
+  tree vop;
   int group_size = VEC_length (gimple, stmts);
   unsigned int vec_num, i;
   int number_of_copies = 1;
   VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
   bool constant_p, is_store;
   tree neutral_op = NULL;
+  enum tree_code code = gimple_assign_rhs_code (stmt);
 
   if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
     {
-      enum tree_code code = gimple_assign_rhs_code (stmt);
       if (reduc_index == -1)
         {
           VEC_free (tree, heap, *vec_oprnds);
@@ -1847,7 +1910,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
         }
 
       op_num = reduc_index - 1;
-      op = gimple_op (stmt, op_num + 1);
+      op = gimple_op (stmt, reduc_index);
       /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
          we need either neutral operands or the original operands.  See
          get_initial_def_for_reduction() for details.  */
@@ -1889,25 +1952,16 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
       op = gimple_assign_rhs1 (stmt);
     }
   else
-    {
-      is_store = false;
-      op = gimple_op (stmt, op_num + 1);
-    }
+    is_store = false;
+
+  gcc_assert (op);
 
   if (CONSTANT_CLASS_P (op))
-    {
-      constant_p = true;
-      if (POINTER_TYPE_P (TREE_TYPE (gimple_assign_lhs (stmt))))
-        vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
-      else
-        vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
-    }
+    constant_p = true;
   else
-    {
-      constant_p = false;
-      vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
-    }
+    constant_p = false;
 
+  vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
   gcc_assert (vector_type);
   nunits = TYPE_VECTOR_SUBPARTS (vector_type);
 
@@ -1945,11 +1999,17 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
               gimple def_stmt = SSA_NAME_DEF_STMT (op);
 
               gcc_assert (loop);
-              /* Get the def before the loop.  */
-              op = PHI_ARG_DEF_FROM_EDGE (def_stmt, 
-                                          loop_preheader_edge (loop));
-              if (j != (number_of_copies - 1) && neutral_op)
+
+              /* Get the def before the loop.  In reduction chain we have only
+                 one initial value.  */
+              if ((j != (number_of_copies - 1)
+                   || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
+                       && i != 0))
+                  && neutral_op)
                 op = neutral_op;
+              else
+                op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
+                                            loop_preheader_edge (loop));
             }
 
           /* Create 'vect_ = {op0,op1,...,opn}'.  */
@@ -1994,12 +2054,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
       if (neutral_op)
         {
           if (!neutral_vec)
-            {
-              t = NULL;
-              for (i = 0; i < (unsigned) nunits; i++)
-                 t = tree_cons (NULL_TREE, neutral_op, t);
-              neutral_vec = build_vector (vector_type, t);
-            }
+           neutral_vec = build_vector_from_val (vector_type, neutral_op);
 
           VEC_quick_push (tree, *vec_oprnds, neutral_vec);
         }
@@ -2043,7 +2098,8 @@ vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
    the right node. This is used when the second operand must remain scalar.  */
 
 void
-vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
+vect_get_slp_defs (tree op0, tree op1, slp_tree slp_node,
+                   VEC (tree,heap) **vec_oprnds0,
                    VEC (tree,heap) **vec_oprnds1, int reduc_index)
 {
   gimple first_stmt;
@@ -2083,7 +2139,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
     vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
   else
     /* Build vectors from scalar defs.  */
-    vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects,
+    vect_get_constant_vectors (op0, slp_node, vec_oprnds0, 0, number_of_vects,
                                reduc_index);
 
   if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
@@ -2113,7 +2169,8 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
     vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
   else
     /* Build vectors from scalar defs.  */
-    vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects, -1);
+    vect_get_constant_vectors (op1, slp_node, vec_oprnds1, 1, number_of_vects,
+                               -1);
 }
 
 
@@ -2500,6 +2557,16 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
       si = gsi_for_stmt (last_store);
     }
 
+  /* Mark the first element of the reduction chain as reduction to properly
+     transform the node.  In the analysis phase only the last element of the
+     chain is marked as reduction.  */
+  if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_STRIDED_ACCESS (stmt_info)
+      && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
+    {
+      STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
+      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+    }
+
   is_store = vect_transform_stmt (stmt, &si, &strided_store, node, instance);
   return is_store;
 }