OSDN Git Service

* decl.c (prepend_attributes): Fix typo.
[pf3gnuchains/gcc-fork.git] / gcc / tree-vectorizer.c
index c239d29..ac3f843 100644 (file)
@@ -147,24 +147,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-pass.h"
 
 /*************************************************************************
-  Simple Loop Peeling Utilities
- *************************************************************************/
-static void slpeel_update_phis_for_duplicate_loop 
-  (struct loop *, struct loop *, bool after);
-static void slpeel_update_phi_nodes_for_guard1 
-  (edge, struct loop *, bool, basic_block *, bitmap *); 
-static void slpeel_update_phi_nodes_for_guard2 
-  (edge, struct loop *, bool, basic_block *);
-static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
-
-static void rename_use_op (use_operand_p);
-static void rename_variables_in_bb (basic_block);
-static void rename_variables_in_loop (struct loop *);
-
-/*************************************************************************
   General Vectorization Utilities
  *************************************************************************/
-static void vect_set_dump_settings (void);
 
 /* vect_dump will be set to stderr or dump_file if exist.  */
 FILE *vect_dump;
@@ -241,7 +225,7 @@ rename_variables_in_bb (basic_block bb)
 
 /* Renames variables in new generated LOOP.  */
 
-static void
+void
 rename_variables_in_loop (struct loop *loop)
 {
   unsigned i;
@@ -806,7 +790,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
 /* Given LOOP this function generates a new copy of it and puts it 
    on E which is either the entry or exit of LOOP.  */
 
-static struct loop *
+struct loop *
 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 {
   struct loop *new_loop;
@@ -871,6 +855,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
   if (at_exit) /* Add the loop copy at exit.  */
     {
       redirect_edge_and_branch_force (e, new_loop->header);
+      PENDING_STMT (e) = NULL;
       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
       if (was_imm_dom)
        set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
@@ -888,6 +873,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
        new_exit_e = EDGE_SUCC (new_loop->header, 1); 
 
       redirect_edge_and_branch_force (new_exit_e, loop->header);
+      PENDING_STMT (new_exit_e) = NULL;
       set_immediate_dominator (CDI_DOMINATORS, loop->header,
                               new_exit_e->src);
 
@@ -901,6 +887,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
        }    
 
       redirect_edge_and_branch_force (entry_e, new_loop->header);
+      PENDING_STMT (entry_e) = NULL;
       set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
     }
 
@@ -918,20 +905,29 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
 
 static edge
 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
-                       basic_block dom_bb)
+                      basic_block dom_bb)
 {
   block_stmt_iterator bsi;
   edge new_e, enter_e;
   tree cond_stmt;
+  tree gimplify_stmt_list;
 
   enter_e = EDGE_SUCC (guard_bb, 0);
   enter_e->flags &= ~EDGE_FALLTHRU;
   enter_e->flags |= EDGE_FALSE_VALUE;
   bsi = bsi_last (guard_bb);
 
+  cond =
+    force_gimple_operand (cond, &gimplify_stmt_list, true,
+                         NULL_TREE);
   cond_stmt = build3 (COND_EXPR, void_type_node, cond,
                      NULL_TREE, NULL_TREE);
+  if (gimplify_stmt_list)
+    bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+  bsi = bsi_last (guard_bb);
   bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+
   /* Add new edge to connect guard block to the merge/loop-exit block.  */
   new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
   set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
@@ -1007,12 +1003,89 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
 }
 #endif
 
+/* If the run time cost model check determines that vectorization is
+   not profitable and hence scalar loop should be generated then set
+   FIRST_NITERS to prologue peeled iterations. This will allow all the
+   iterations to be executed in the prologue peeled scalar loop.  */
+
+void
+set_prologue_iterations (basic_block bb_before_first_loop,
+                        tree first_niters,
+                        struct loop *loop,
+                        unsigned int th)
+{
+  edge e;
+  basic_block cond_bb, then_bb;
+  tree var, prologue_after_cost_adjust_name, stmt;
+  block_stmt_iterator bsi;
+  tree newphi;
+  edge e_true, e_false, e_fallthru;
+  tree cond_stmt;
+  tree gimplify_stmt_list;
+  tree cost_pre_condition = NULL_TREE;
+  tree scalar_loop_iters = 
+    unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
+
+  e = single_pred_edge (bb_before_first_loop);
+  cond_bb = split_edge(e);
+
+  e = single_pred_edge (bb_before_first_loop);
+  then_bb = split_edge(e);
+  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
+
+  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
+                                  EDGE_FALSE_VALUE);
+  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
+
+  e_true = EDGE_PRED (then_bb, 0);
+  e_true->flags &= ~EDGE_FALLTHRU;
+  e_true->flags |= EDGE_TRUE_VALUE;
+
+  e_fallthru = EDGE_SUCC (then_bb, 0);
+
+  cost_pre_condition =
+    build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, 
+           build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+  cost_pre_condition =
+    force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
+                         true, NULL_TREE);
+  cond_stmt = build3 (COND_EXPR, void_type_node, cost_pre_condition,
+                     NULL_TREE, NULL_TREE);
+
+  bsi = bsi_last (cond_bb);
+  if (gimplify_stmt_list)
+    bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+  bsi = bsi_last (cond_bb);
+  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+                                         
+  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
+                       "prologue_after_cost_adjust");
+  add_referenced_var (var);
+  prologue_after_cost_adjust_name = 
+    force_gimple_operand (scalar_loop_iters, &stmt, false, var);
+
+  bsi = bsi_last (then_bb);
+  if (stmt)
+    bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
+
+  newphi = create_phi_node (var, bb_before_first_loop);
+  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
+  add_phi_arg (newphi, first_niters, e_false);
+
+  first_niters = PHI_RESULT (newphi);
+}
+
+
 /* Function slpeel_tree_peel_loop_to_edge.
 
    Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
    that is placed on the entry (exit) edge E of LOOP. After this transformation
    we have two loops one after the other - first-loop iterates FIRST_NITERS
    times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
+   If the cost model indicates that it is profitable to emit a scalar 
+   loop instead of the vector one, then the prolog (epilog) loop will iterate
+   for the entire unchanged scalar iterations of the loop.
 
    Input:
    - LOOP: the loop to be peeled.
@@ -1027,6 +1100,13 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
         for updating the loop bound of the first-loop to FIRST_NITERS.  If it
         is false, the caller of this function may want to take care of this
         (this can be useful if we don't want new stmts added to first-loop).
+   - TH: cost model profitability threshold of iterations for vectorization.
+   - CHECK_PROFITABILITY: specify whether cost model check has not occured
+                          during versioning and hence needs to occur during
+                         prologue generation or whether cost model check 
+                         has not occured during prologue generation and hence
+                         needs to occur during epilogue generation.
+           
 
    Output:
    The function returns a pointer to the new loop-copy, or NULL if it failed
@@ -1048,11 +1128,11 @@ struct loop*
 slpeel_tree_peel_loop_to_edge (struct loop *loop, 
                               edge e, tree first_niters, 
                               tree niters, bool update_first_loop_count,
-                              unsigned int th)
+                              unsigned int th, bool check_profitability)
 {
   struct loop *new_loop = NULL, *first_loop, *second_loop;
   edge skip_e;
-  tree pre_condition;
+  tree pre_condition = NULL_TREE;
   bitmap definitions;
   basic_block bb_before_second_loop, bb_after_second_loop;
   basic_block bb_before_first_loop;
@@ -1060,6 +1140,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   basic_block new_exit_bb;
   edge exit_e = single_exit (loop);
   LOC loop_loc;
+  tree cost_pre_condition = NULL_TREE;
   
   if (!slpeel_can_duplicate_loop_p (loop, e))
     return NULL;
@@ -1116,32 +1197,124 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
   rename_variables_in_loop (new_loop);
 
 
-  /* 2. Add the guard that controls whether the first loop is executed.
-        Resulting CFG would be:
+  /* 2.  Add the guard code in one of the following ways:
 
-        bb_before_first_loop:
-        if (FIRST_NITERS == 0) GOTO bb_before_second_loop
-                               GOTO first-loop
+     2.a Add the guard that controls whether the first loop is executed.
+         This occurs when this function is invoked for prologue or epilogiue
+        generation and when the cost model check can be done at compile time.
 
-        first_loop:
-        do {
-        } while ...
+         Resulting CFG would be:
 
-        bb_before_second_loop:
+         bb_before_first_loop:
+         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
 
-        second_loop:
-        do {
-        } while ...
+         first_loop:
+         do {
+         } while ...
 
-        orig_exit_bb:
-   */
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+
+     2.b Add the cost model check that allows the prologue
+         to iterate for the entire unchanged scalar
+         iterations of the loop in the event that the cost
+         model indicates that the scalar loop is more
+         profitable than the vector one. This occurs when
+        this function is invoked for prologue generation
+        and the cost model check needs to be done at run
+        time.
+
+         Resulting CFG after prologue peeling would be:
+
+         if (scalar_loop_iterations <= th)
+           FIRST_NITERS = scalar_loop_iterations
+
+         bb_before_first_loop:
+         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
+
+         first_loop:
+         do {
+         } while ...
+
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+
+     2.c Add the cost model check that allows the epilogue
+         to iterate for the entire unchanged scalar
+         iterations of the loop in the event that the cost
+         model indicates that the scalar loop is more
+         profitable than the vector one. This occurs when
+        this function is invoked for epilogue generation
+        and the cost model check needs to be done at run
+        time.
+
+         Resulting CFG after prologue peeling would be:
+
+         bb_before_first_loop:
+         if ((scalar_loop_iterations <= th)
+             ||
+             FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
+
+         first_loop:
+         do {
+         } while ...
+
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+  */
 
   bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
   bb_before_second_loop = split_edge (single_exit (first_loop));
 
-  pre_condition =
-    fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
-       build_int_cst (TREE_TYPE (first_niters), th));
+  /* Epilogue peeling.  */
+  if (!update_first_loop_count)
+    {
+      pre_condition =
+       fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
+                    build_int_cst (TREE_TYPE (first_niters), 0));
+      if (check_profitability)
+       {
+         tree scalar_loop_iters
+           = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
+                                       (loop_vec_info_for_loop (loop)));
+         cost_pre_condition = 
+           build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, 
+                   build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+
+         pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+                                      cost_pre_condition, pre_condition);
+       }
+    }
+
+  /* Prologue peeling.  */  
+  else
+    {
+      if (check_profitability)
+       set_prologue_iterations (bb_before_first_loop, first_niters,
+                                loop, th);
+
+      pre_condition =
+       fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
+                    build_int_cst (TREE_TYPE (first_niters), 0));
+    }
 
   skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                   bb_before_second_loop, bb_before_first_loop);
@@ -1359,6 +1532,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
   STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
   STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
   STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
+  STMT_SLP_TYPE (res) = 0;
   DR_GROUP_FIRST_DR (res) = NULL_TREE;
   DR_GROUP_NEXT_DR (res) = NULL_TREE;
   DR_GROUP_SIZE (res) = 0;
@@ -1371,6 +1545,22 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
 }
 
 
+/* Free stmt vectorization related info.  */
+
+void
+free_stmt_vec_info (tree stmt)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+  if (!stmt_info)
+    return;
+
+  VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
+  free (stmt_info);
+  set_stmt_info (stmt_ann (stmt), NULL);
+}
+
+
 /* Function bb_in_loop_p
 
    Used as predicate for dfs order traversal of the loop bbs.  */
@@ -1467,6 +1657,7 @@ new_loop_vec_info (struct loop *loop)
 
   LOOP_VINFO_BBS (res) = bbs;
   LOOP_VINFO_NITERS (res) = NULL;
+  LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
   LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
   LOOP_VINFO_VECTORIZABLE_P (res) = 0;
   LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
@@ -1478,7 +1669,9 @@ new_loop_vec_info (struct loop *loop)
     VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
   LOOP_VINFO_MAY_ALIAS_DDRS (res) =
     VEC_alloc (ddr_p, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
-
+  LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (tree, heap, 10);
+  LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
+  LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
 
   return res;
 }
@@ -1497,6 +1690,8 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
   int nbbs;
   block_stmt_iterator si;
   int j;
+  VEC (slp_instance, heap) *slp_instances;
+  slp_instance instance;
 
   if (!loop_vinfo)
     return;
@@ -1522,21 +1717,13 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
     {
       basic_block bb = bbs[j];
       tree phi;
-      stmt_vec_info stmt_info;
 
       for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
-        {
-          stmt_ann_t ann = stmt_ann (phi);
-
-          stmt_info = vinfo_for_stmt (phi);
-          free (stmt_info);
-          set_stmt_info (ann, NULL);
-        }
+        free_stmt_vec_info (phi);
 
       for (si = bsi_start (bb); !bsi_end_p (si); )
        {
          tree stmt = bsi_stmt (si);
-         stmt_ann_t ann = stmt_ann (stmt);
          stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 
          if (stmt_info)
@@ -1554,9 +1741,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
                }
                        
              /* Free stmt_vec_info.  */
-             VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
-             free (stmt_info);
-             set_stmt_info (ann, NULL);
+             free_stmt_vec_info (stmt);
 
              /* Remove dead "pattern stmts".  */
              if (remove_stmt_p)
@@ -1571,6 +1756,11 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
   free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
   VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
   VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
+  slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+  for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
+    vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
+  VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+  VEC_free (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
 
   free (loop_vinfo);
   loop->aux = NULL;
@@ -1597,12 +1787,9 @@ vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
   if (TREE_STATIC (decl))
     return (alignment <= MAX_OFILE_ALIGNMENT);
   else
-    /* This is not 100% correct.  The absolute correct stack alignment
-       is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
-       PREFERRED_STACK_BOUNDARY is honored by all translation units.
-       However, until someone implements forced stack alignment, SSE
-       isn't really usable without this.  */  
-    return (alignment <= PREFERRED_STACK_BOUNDARY); 
+    /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
+       correct until someone implements forced stack alignment.  */
+    return (alignment <= STACK_BOUNDARY); 
 }
 
 
@@ -1809,7 +1996,13 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
       *dt = vect_invariant_def;
       return true;
    }
-    
+
+  if (TREE_CODE (operand) == PAREN_EXPR)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "non-associatable copy.");
+      operand = TREE_OPERAND (operand, 0);
+    }
   if (TREE_CODE (operand) != SSA_NAME)
     {
       if (vect_print_dump_info (REPORT_DETAILS))
@@ -2149,7 +2342,7 @@ reduction_code_for_scalar_code (enum tree_code code,
 
 /* Function vect_is_simple_reduction
 
-   Detect a cross-iteration def-use cucle that represents a simple
+   Detect a cross-iteration def-use cycle that represents a simple
    reduction computation. We look for the following pattern:
 
    loop_header:
@@ -2574,8 +2767,10 @@ gate_increase_alignment (void)
   return flag_section_anchors && flag_tree_vectorize;
 }
 
-struct tree_opt_pass pass_ipa_increase_alignment = 
+struct simple_ipa_opt_pass pass_ipa_increase_alignment = 
 {
+ {
+  SIMPLE_IPA_PASS,
   "increase_alignment",                        /* name */
   gate_increase_alignment,             /* gate */
   increase_alignment,                  /* execute */
@@ -2587,6 +2782,6 @@ struct tree_opt_pass pass_ipa_increase_alignment =
   0,                                   /* properties_provided */
   0,                                   /* properties_destroyed */
   0,                                   /* todo_flags_start */
-  0,                                   /* todo_flags_finish */
-  0                                    /* letter */
+  0                                    /* todo_flags_finish */
+ }
 };