OSDN Git Service

2011-05-02 Richard Guenther <rguenther@suse.de>
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-loop.c
index 3aa77d6..e05c324 100644 (file)
@@ -745,6 +745,7 @@ new_loop_vec_info (struct loop *loop)
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
   LOOP_VINFO_VECT_FACTOR (res) = 0;
+  LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
   LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
   LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
   LOOP_VINFO_UNALIGNED_DR (res) = NULL;
@@ -793,7 +794,9 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
       free (LOOP_VINFO_BBS (loop_vinfo));
       free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
       free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+      VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
       VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+      VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
 
       free (loop_vinfo);
       loop->aux = NULL;
@@ -839,6 +842,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
   free (LOOP_VINFO_BBS (loop_vinfo));
   free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
+  VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
   VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
   VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
   slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
@@ -1142,7 +1146,7 @@ vect_get_cost (enum vect_cost_for_stmt type_of_cost)
    Scan the loop stmts and make sure they are all vectorizable.  */
 
 static bool
-vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
 {
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
@@ -1163,6 +1167,40 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 
   gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
   vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+  if (slp)
+    {
+      /* If all the stmts in the loop can be SLPed, we perform only SLP, and
+        vectorization factor of the loop is the unrolling factor required by
+        the SLP instances.  If that unrolling factor is 1, we say, that we
+        perform pure SLP on loop - cross iteration parallelism is not
+        exploited.  */
+      for (i = 0; i < nbbs; i++)
+       {
+         basic_block bb = bbs[i];
+         for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
+           {
+             gimple stmt = gsi_stmt (si);
+             stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+             gcc_assert (stmt_info);
+             if ((STMT_VINFO_RELEVANT_P (stmt_info)
+                  || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+                 && !PURE_SLP_STMT (stmt_info))
+               /* STMT needs both SLP and loop-based vectorization.  */
+               only_slp_in_loop = false;
+           }
+       }
+
+      if (only_slp_in_loop)
+       vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
+      else
+       vectorization_factor = least_common_multiple (vectorization_factor,
+                               LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+
+      LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+      if (vect_print_dump_info (REPORT_DETAILS))
+       fprintf (vect_dump, "Updating vectorization factor to %d ",
+                           vectorization_factor);
+    }
 
   for (i = 0; i < nbbs; i++)
     {
@@ -1180,11 +1218,11 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
               print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
             }
 
+          /* Inner-loop loop-closed exit phi in outer-loop vectorization
+             (i.e., a phi in the tail of the outer-loop).  */
           if (! is_loop_header_bb_p (bb))
             {
-              /* inner-loop loop-closed exit phi in outer-loop vectorization
-                 (i.e. a phi in the tail of the outer-loop).
-                 FORNOW: we currently don't support the case that these phis
+              /* FORNOW: we currently don't support the case that these phis
                  are not used in the outerloop (unless it is double reduction,
                  i.e., this phi is vect_reduction_def), cause this case
                  requires to actually do something here.  */
@@ -1198,6 +1236,32 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
                              "Unsupported loop-closed phi in outer-loop.");
                   return false;
                 }
+
+              /* If PHI is used in the outer loop, we check that its operand
+                 is defined in the inner loop.  */
+              if (STMT_VINFO_RELEVANT_P (stmt_info))
+                {
+                  tree phi_op;
+                  gimple op_def_stmt;
+
+                  if (gimple_phi_num_args (phi) != 1)
+                    return false;
+
+                  phi_op = PHI_ARG_DEF (phi, 0);
+                  if (TREE_CODE (phi_op) != SSA_NAME)
+                    return false;
+
+                  op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
+                  if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
+                    return false;
+
+                  if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+                        != vect_used_in_outer
+                      && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
+                           != vect_used_in_outer_by_reduction)
+                    return false;
+                }
+
               continue;
             }
 
@@ -1242,18 +1306,8 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
       for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
         {
           gimple stmt = gsi_stmt (si);
-          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-
-          gcc_assert (stmt_info);
-
          if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
            return false;
-
-          if ((STMT_VINFO_RELEVANT_P (stmt_info)
-               || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
-              && !PURE_SLP_STMT (stmt_info))
-            /* STMT needs both SLP and loop-based vectorization.  */
-            only_slp_in_loop = false;
         }
     } /* bbs */
 
@@ -1273,18 +1327,6 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
       return false;
     }
 
-  /* If all the stmts in the loop can be SLPed, we perform only SLP, and
-     vectorization factor of the loop is the unrolling factor required by the
-     SLP instances.  If that unrolling factor is 1, we say, that we perform
-     pure SLP on loop - cross iteration parallelism is not exploited.  */
-  if (only_slp_in_loop)
-    vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
-  else
-    vectorization_factor = least_common_multiple (vectorization_factor,
-                                LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
-
-  LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
-
   if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
       && vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump,
@@ -1380,7 +1422,7 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
 static bool
 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
 {
-  bool ok, dummy;
+  bool ok, dummy, slp = false;
   int max_vf = MAX_VECTORIZATION_FACTOR;
   int min_vf = 2;
 
@@ -1494,7 +1536,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   if (ok)
     {
       /* Decide which possible SLP instances to SLP.  */
-      vect_make_slp_decision (loop_vinfo);
+      slp = vect_make_slp_decision (loop_vinfo);
 
       /* Find stmts that need to be both vectorized and SLPed.  */
       vect_detect_hybrid_slp (loop_vinfo);
@@ -1503,7 +1545,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo)
   /* Scan all the operations in the loop and make sure they are
      vectorizable.  */
 
-  ok = vect_analyze_loop_operations (loop_vinfo);
+  ok = vect_analyze_loop_operations (loop_vinfo, slp);
   if (!ok)
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -1645,9 +1687,10 @@ report_vect_op (gimple stmt, const char *msg)
    1. operation is commutative and associative and it is safe to
       change the order of the computation (if CHECK_REDUCTION is true)
    2. no uses for a2 in the loop (a2 is used out of the loop)
-   3. no uses of a1 in the loop besides the reduction operation.
+   3. no uses of a1 in the loop besides the reduction operation
+   4. no uses of a1 outside the loop.
 
-   Condition 1 is tested here.
+   Conditions 1,4 are tested here.
    Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
 
    (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
@@ -1698,8 +1741,16 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
       gimple use_stmt = USE_STMT (use_p);
       if (is_gimple_debug (use_stmt))
        continue;
-      if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
-         && vinfo_for_stmt (use_stmt)
+
+      if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+        {
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "intermediate value used outside loop.");
+
+          return NULL;
+        }
+
+      if (vinfo_for_stmt (use_stmt)
          && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
         nloop_uses++;
       if (nloop_uses > 1)
@@ -1970,6 +2021,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
       && (code == COND_EXPR
           || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
               && (is_gimple_assign (def1)
+                 || is_gimple_call (def1)
                  || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
                       == vect_induction_def
                  || (gimple_code (def1) == GIMPLE_PHI
@@ -1985,6 +2037,7 @@ vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
           && (code == COND_EXPR
                || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
                   && (is_gimple_assign (def2)
+                      || is_gimple_call (def2)
                       || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
                            == vect_induction_def
                       || (gimple_code (def2) == GIMPLE_PHI
@@ -2483,6 +2536,9 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
     case GIMPLE_BINARY_RHS:
       reduction_op = gimple_assign_rhs2 (stmt);
       break;
+    case GIMPLE_TERNARY_RHS:
+      reduction_op = gimple_assign_rhs3 (stmt);
+      break;
     default:
       gcc_unreachable ();
     }
@@ -2871,6 +2927,10 @@ get_initial_def_for_induction (gimple iv_phi)
       gimple_assign_set_lhs (new_stmt, induc_def);
       si = gsi_start_bb (bb);
       gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
+      set_vinfo_for_stmt (new_stmt,
+                         new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
+      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
+       = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
     }
 
   return induc_def;
@@ -3172,7 +3232,7 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
     {
     case GIMPLE_SINGLE_RHS:
       gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
-                                       == ternary_op);
+                 == ternary_op);
       reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
       break;
     case GIMPLE_UNARY_RHS:
@@ -3182,6 +3242,9 @@ vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
       reduction_op = reduc_index ?
                      gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
       break;
+    case GIMPLE_TERNARY_RHS:
+      reduction_op = gimple_op (stmt, reduc_index + 1);
+      break;
     default:
       gcc_unreachable ();
     }
@@ -4007,6 +4070,15 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
       ops[1] = gimple_assign_rhs2 (stmt);
       break;
 
+    case GIMPLE_TERNARY_RHS:
+      code = gimple_assign_rhs_code (stmt);
+      op_type = TREE_CODE_LENGTH (code);
+      gcc_assert (op_type == ternary_op);
+      ops[0] = gimple_assign_rhs1 (stmt);
+      ops[1] = gimple_assign_rhs2 (stmt);
+      ops[2] = gimple_assign_rhs3 (stmt);
+      break;
+
     case GIMPLE_UNARY_RHS:
       return false;
 
@@ -4076,7 +4148,7 @@ vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
   if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
     return false;
 
-  if (slp_node)
+  if (slp_node || PURE_SLP_STMT (stmt_info))
     ncopies = 1;
   else
     ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)