* decl.c (prepend_attributes): Fix typo.

[pf3gnuchains/gcc-fork.git] / gcc / tree-vectorizer.c
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c

index c239d29..ac3f843 100644 (file)
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -147,24 +147,8 @@ along with GCC; see the file COPYING3.  If not see
  #include "tree-pass.h"
  
  /*************************************************************************
-  Simple Loop Peeling Utilities
- *************************************************************************/
-static void slpeel_update_phis_for_duplicate_loop 
-  (struct loop *, struct loop *, bool after);
-static void slpeel_update_phi_nodes_for_guard1 
-  (edge, struct loop *, bool, basic_block *, bitmap *); 
-static void slpeel_update_phi_nodes_for_guard2 
-  (edge, struct loop *, bool, basic_block *);
-static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
-
-static void rename_use_op (use_operand_p);
-static void rename_variables_in_bb (basic_block);
-static void rename_variables_in_loop (struct loop *);
-
-/*************************************************************************
    General Vectorization Utilities
   *************************************************************************/
-static void vect_set_dump_settings (void);
  
  /* vect_dump will be set to stderr or dump_file if exist.  */
  FILE *vect_dump;
@@ -241,7 +225,7 @@ rename_variables_in_bb (basic_block bb)
  
  /* Renames variables in new generated LOOP.  */
  
-static void
+void
  rename_variables_in_loop (struct loop *loop)
  {
    unsigned i;
@@ -806,7 +790,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
  /* Given LOOP this function generates a new copy of it and puts it 
     on E which is either the entry or exit of LOOP.  */
  
-static struct loop *
+struct loop *
  slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
  {
    struct loop *new_loop;
@@ -871,6 +855,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
    if (at_exit) /* Add the loop copy at exit.  */
      {
        redirect_edge_and_branch_force (e, new_loop->header);
+      PENDING_STMT (e) = NULL;
        set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
        if (was_imm_dom)
         set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
@@ -888,6 +873,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
         new_exit_e = EDGE_SUCC (new_loop->header, 1); 
  
        redirect_edge_and_branch_force (new_exit_e, loop->header);
+      PENDING_STMT (new_exit_e) = NULL;
        set_immediate_dominator (CDI_DOMINATORS, loop->header,
                                new_exit_e->src);
  
@@ -901,6 +887,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
         }    
  
        redirect_edge_and_branch_force (entry_e, new_loop->header);
+      PENDING_STMT (entry_e) = NULL;
        set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
      }
  
@@ -918,20 +905,29 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
  
  static edge
  slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
-                       basic_block dom_bb)
+                      basic_block dom_bb)
  {
    block_stmt_iterator bsi;
    edge new_e, enter_e;
    tree cond_stmt;
+  tree gimplify_stmt_list;
  
    enter_e = EDGE_SUCC (guard_bb, 0);
    enter_e->flags &= ~EDGE_FALLTHRU;
    enter_e->flags |= EDGE_FALSE_VALUE;
    bsi = bsi_last (guard_bb);
  
+  cond =
+    force_gimple_operand (cond, &gimplify_stmt_list, true,
+                         NULL_TREE);
    cond_stmt = build3 (COND_EXPR, void_type_node, cond,
                       NULL_TREE, NULL_TREE);
+  if (gimplify_stmt_list)
+    bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+  bsi = bsi_last (guard_bb);
    bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+
    /* Add new edge to connect guard block to the merge/loop-exit block.  */
    new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
    set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
@@ -1007,12 +1003,89 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
  }
  #endif
  
+/* If the run time cost model check determines that vectorization is
+   not profitable and hence scalar loop should be generated then set
+   FIRST_NITERS to prologue peeled iterations. This will allow all the
+   iterations to be executed in the prologue peeled scalar loop.  */
+
+void
+set_prologue_iterations (basic_block bb_before_first_loop,
+                        tree first_niters,
+                        struct loop *loop,
+                        unsigned int th)
+{
+  edge e;
+  basic_block cond_bb, then_bb;
+  tree var, prologue_after_cost_adjust_name, stmt;
+  block_stmt_iterator bsi;
+  tree newphi;
+  edge e_true, e_false, e_fallthru;
+  tree cond_stmt;
+  tree gimplify_stmt_list;
+  tree cost_pre_condition = NULL_TREE;
+  tree scalar_loop_iters = 
+    unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
+
+  e = single_pred_edge (bb_before_first_loop);
+  cond_bb = split_edge(e);
+
+  e = single_pred_edge (bb_before_first_loop);
+  then_bb = split_edge(e);
+  set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
+
+  e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
+                                  EDGE_FALSE_VALUE);
+  set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
+
+  e_true = EDGE_PRED (then_bb, 0);
+  e_true->flags &= ~EDGE_FALLTHRU;
+  e_true->flags |= EDGE_TRUE_VALUE;
+
+  e_fallthru = EDGE_SUCC (then_bb, 0);
+
+  cost_pre_condition =
+    build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, 
+           build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+  cost_pre_condition =
+    force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
+                         true, NULL_TREE);
+  cond_stmt = build3 (COND_EXPR, void_type_node, cost_pre_condition,
+                     NULL_TREE, NULL_TREE);
+
+  bsi = bsi_last (cond_bb);
+  if (gimplify_stmt_list)
+    bsi_insert_after (&bsi, gimplify_stmt_list, BSI_NEW_STMT);
+
+  bsi = bsi_last (cond_bb);
+  bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
+                                         
+  var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
+                       "prologue_after_cost_adjust");
+  add_referenced_var (var);
+  prologue_after_cost_adjust_name = 
+    force_gimple_operand (scalar_loop_iters, &stmt, false, var);
+
+  bsi = bsi_last (then_bb);
+  if (stmt)
+    bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
+
+  newphi = create_phi_node (var, bb_before_first_loop);
+  add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
+  add_phi_arg (newphi, first_niters, e_false);
+
+  first_niters = PHI_RESULT (newphi);
+}
+
+
  /* Function slpeel_tree_peel_loop_to_edge.
  
     Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
     that is placed on the entry (exit) edge E of LOOP. After this transformation
     we have two loops one after the other - first-loop iterates FIRST_NITERS
     times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
+   If the cost model indicates that it is profitable to emit a scalar 
+   loop instead of the vector one, then the prolog (epilog) loop will iterate
+   for the entire unchanged scalar iterations of the loop.
  
     Input:
     - LOOP: the loop to be peeled.
@@ -1027,6 +1100,13 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
          for updating the loop bound of the first-loop to FIRST_NITERS.  If it
          is false, the caller of this function may want to take care of this
          (this can be useful if we don't want new stmts added to first-loop).
+   - TH: cost model profitability threshold of iterations for vectorization.
+   - CHECK_PROFITABILITY: specify whether cost model check has not occured
+                          during versioning and hence needs to occur during
+                         prologue generation or whether cost model check 
+                         has not occured during prologue generation and hence
+                         needs to occur during epilogue generation.
+           
  
     Output:
     The function returns a pointer to the new loop-copy, or NULL if it failed
@@ -1048,11 +1128,11 @@ struct loop*
  slpeel_tree_peel_loop_to_edge (struct loop *loop, 
                                edge e, tree first_niters, 
                                tree niters, bool update_first_loop_count,
-                              unsigned int th)
+                              unsigned int th, bool check_profitability)
  {
    struct loop *new_loop = NULL, *first_loop, *second_loop;
    edge skip_e;
-  tree pre_condition;
+  tree pre_condition = NULL_TREE;
    bitmap definitions;
    basic_block bb_before_second_loop, bb_after_second_loop;
    basic_block bb_before_first_loop;
@@ -1060,6 +1140,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
    basic_block new_exit_bb;
    edge exit_e = single_exit (loop);
    LOC loop_loc;
+  tree cost_pre_condition = NULL_TREE;
    
    if (!slpeel_can_duplicate_loop_p (loop, e))
      return NULL;
@@ -1116,32 +1197,124 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop,
    rename_variables_in_loop (new_loop);
  
  
-  /* 2. Add the guard that controls whether the first loop is executed.
-        Resulting CFG would be:
+  /* 2.  Add the guard code in one of the following ways:
  
-        bb_before_first_loop:
-        if (FIRST_NITERS == 0) GOTO bb_before_second_loop
-                               GOTO first-loop
+     2.a Add the guard that controls whether the first loop is executed.
+         This occurs when this function is invoked for prologue or epilogiue
+        generation and when the cost model check can be done at compile time.
  
-        first_loop:
-        do {
-        } while ...
+         Resulting CFG would be:
  
-        bb_before_second_loop:
+         bb_before_first_loop:
+         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
  
-        second_loop:
-        do {
-        } while ...
+         first_loop:
+         do {
+         } while ...
  
-        orig_exit_bb:
-   */
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+
+     2.b Add the cost model check that allows the prologue
+         to iterate for the entire unchanged scalar
+         iterations of the loop in the event that the cost
+         model indicates that the scalar loop is more
+         profitable than the vector one. This occurs when
+        this function is invoked for prologue generation
+        and the cost model check needs to be done at run
+        time.
+
+         Resulting CFG after prologue peeling would be:
+
+         if (scalar_loop_iterations <= th)
+           FIRST_NITERS = scalar_loop_iterations
+
+         bb_before_first_loop:
+         if (FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
+
+         first_loop:
+         do {
+         } while ...
+
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+
+     2.c Add the cost model check that allows the epilogue
+         to iterate for the entire unchanged scalar
+         iterations of the loop in the event that the cost
+         model indicates that the scalar loop is more
+         profitable than the vector one. This occurs when
+        this function is invoked for epilogue generation
+        and the cost model check needs to be done at run
+        time.
+
+         Resulting CFG after prologue peeling would be:
+
+         bb_before_first_loop:
+         if ((scalar_loop_iterations <= th)
+             ||
+             FIRST_NITERS == 0) GOTO bb_before_second_loop
+                                GOTO first-loop
+
+         first_loop:
+         do {
+         } while ...
+
+         bb_before_second_loop:
+
+         second_loop:
+         do {
+         } while ...
+
+         orig_exit_bb:
+  */
  
    bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
    bb_before_second_loop = split_edge (single_exit (first_loop));
  
-  pre_condition =
-    fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
-       build_int_cst (TREE_TYPE (first_niters), th));
+  /* Epilogue peeling.  */
+  if (!update_first_loop_count)
+    {
+      pre_condition =
+       fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
+                    build_int_cst (TREE_TYPE (first_niters), 0));
+      if (check_profitability)
+       {
+         tree scalar_loop_iters
+           = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
+                                       (loop_vec_info_for_loop (loop)));
+         cost_pre_condition = 
+           build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, 
+                   build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+
+         pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+                                      cost_pre_condition, pre_condition);
+       }
+    }
+
+  /* Prologue peeling.  */  
+  else
+    {
+      if (check_profitability)
+       set_prologue_iterations (bb_before_first_loop, first_niters,
+                                loop, th);
+
+      pre_condition =
+       fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
+                    build_int_cst (TREE_TYPE (first_niters), 0));
+    }
  
    skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                    bb_before_second_loop, bb_before_first_loop);
@@ -1359,6 +1532,7 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
    STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
    STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
    STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
+  STMT_SLP_TYPE (res) = 0;
    DR_GROUP_FIRST_DR (res) = NULL_TREE;
    DR_GROUP_NEXT_DR (res) = NULL_TREE;
    DR_GROUP_SIZE (res) = 0;
@@ -1371,6 +1545,22 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
  }
  
  
+/* Free stmt vectorization related info.  */
+
+void
+free_stmt_vec_info (tree stmt)
+{
+  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+
+  if (!stmt_info)
+    return;
+
+  VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
+  free (stmt_info);
+  set_stmt_info (stmt_ann (stmt), NULL);
+}
+
+
  /* Function bb_in_loop_p
  
     Used as predicate for dfs order traversal of the loop bbs.  */
@@ -1467,6 +1657,7 @@ new_loop_vec_info (struct loop *loop)
  
    LOOP_VINFO_BBS (res) = bbs;
    LOOP_VINFO_NITERS (res) = NULL;
+  LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
    LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
    LOOP_VINFO_VECTORIZABLE_P (res) = 0;
    LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
@@ -1478,7 +1669,9 @@ new_loop_vec_info (struct loop *loop)
      VEC_alloc (tree, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
    LOOP_VINFO_MAY_ALIAS_DDRS (res) =
      VEC_alloc (ddr_p, heap, PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
-
+  LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (tree, heap, 10);
+  LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
+  LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
  
    return res;
  }
@@ -1497,6 +1690,8 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
    int nbbs;
    block_stmt_iterator si;
    int j;
+  VEC (slp_instance, heap) *slp_instances;
+  slp_instance instance;
  
    if (!loop_vinfo)
      return;
@@ -1522,21 +1717,13 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
      {
        basic_block bb = bbs[j];
        tree phi;
-      stmt_vec_info stmt_info;
  
        for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
-        {
-          stmt_ann_t ann = stmt_ann (phi);
-
-          stmt_info = vinfo_for_stmt (phi);
-          free (stmt_info);
-          set_stmt_info (ann, NULL);
-        }
+        free_stmt_vec_info (phi);
  
        for (si = bsi_start (bb); !bsi_end_p (si); )
         {
           tree stmt = bsi_stmt (si);
-         stmt_ann_t ann = stmt_ann (stmt);
           stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
  
           if (stmt_info)
@@ -1554,9 +1741,7 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
                 }
                         
               /* Free stmt_vec_info.  */
-             VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
-             free (stmt_info);
-             set_stmt_info (ann, NULL);
+             free_stmt_vec_info (stmt);
  
               /* Remove dead "pattern stmts".  */
               if (remove_stmt_p)
@@ -1571,6 +1756,11 @@ destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
    free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
    VEC_free (tree, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
    VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
+  slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+  for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
+    vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
+  VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
+  VEC_free (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
  
    free (loop_vinfo);
    loop->aux = NULL;
@@ -1597,12 +1787,9 @@ vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
    if (TREE_STATIC (decl))
      return (alignment <= MAX_OFILE_ALIGNMENT);
    else
-    /* This is not 100% correct.  The absolute correct stack alignment
-       is STACK_BOUNDARY.  We're supposed to hope, but not assume, that
-       PREFERRED_STACK_BOUNDARY is honored by all translation units.
-       However, until someone implements forced stack alignment, SSE
-       isn't really usable without this.  */  
-    return (alignment <= PREFERRED_STACK_BOUNDARY); 
+    /* This used to be PREFERRED_STACK_BOUNDARY, however, that is not 100%
+       correct until someone implements forced stack alignment.  */
+    return (alignment <= STACK_BOUNDARY); 
  }
  
  
@@ -1809,7 +1996,13 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
        *dt = vect_invariant_def;
        return true;
     }
-    
+
+  if (TREE_CODE (operand) == PAREN_EXPR)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "non-associatable copy.");
+      operand = TREE_OPERAND (operand, 0);
+    }
    if (TREE_CODE (operand) != SSA_NAME)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
@@ -2149,7 +2342,7 @@ reduction_code_for_scalar_code (enum tree_code code,
  
  /* Function vect_is_simple_reduction
  
-   Detect a cross-iteration def-use cucle that represents a simple
+   Detect a cross-iteration def-use cycle that represents a simple
     reduction computation. We look for the following pattern:
  
     loop_header:
@@ -2574,8 +2767,10 @@ gate_increase_alignment (void)
    return flag_section_anchors && flag_tree_vectorize;
  }
  
-struct tree_opt_pass pass_ipa_increase_alignment = 
+struct simple_ipa_opt_pass pass_ipa_increase_alignment = 
  {
+ {
+  SIMPLE_IPA_PASS,
    "increase_alignment",                        /* name */
    gate_increase_alignment,             /* gate */
    increase_alignment,                  /* execute */
@@ -2587,6 +2782,6 @@ struct tree_opt_pass pass_ipa_increase_alignment =
    0,                                   /* properties_provided */
    0,                                   /* properties_destroyed */
    0,                                   /* todo_flags_start */
-  0,                                   /* todo_flags_finish */
-  0                                    /* letter */
+  0                                    /* todo_flags_finish */
+ }
  };