PR tree-optimization/30858

[pf3gnuchains/gcc-fork.git] / gcc / tree-vectorizer.c
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c

index 7fa5d66..2a53b9c 100644 (file)
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1,5 +1,5 @@
  /* Loop Vectorization
-   Copyright (C) 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
+   Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
     Contributed by Dorit Naishlos <dorit@il.ibm.com>
  
  This file is part of GCC.
@@ -150,8 +150,6 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  /*************************************************************************
    Simple Loop Peeling Utilities
   *************************************************************************/
-static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg 
-  (struct loop *, struct loops *, edge);
  static void slpeel_update_phis_for_duplicate_loop 
    (struct loop *, struct loop *, bool after);
  static void slpeel_update_phi_nodes_for_guard1 
@@ -176,14 +174,11 @@ FILE *vect_dump;
     to mark that it's uninitialized.  */
  enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
  
-/* Number of loops, at the beginning of vectorization.  */
-unsigned int vect_loops_num;
-
  /* Loop location.  */
  static LOC vect_loop_location;
  
  /* Bitmap of virtual variables to be renamed.  */
-bitmap vect_vnames_to_rename;
+bitmap vect_memsyms_to_rename;
  \f
  /*************************************************************************
    Simple Loop Peeling Utilities
@@ -231,8 +226,7 @@ rename_variables_in_bb (basic_block bb)
    for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
      {
        stmt = bsi_stmt (bsi);
-      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, 
-                                (SSA_OP_ALL_USES | SSA_OP_ALL_KILLS))
+      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
         rename_use_op (use_p);
      }
  
@@ -534,7 +528,7 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
          renaming later.  */
        name = PHI_RESULT (orig_phi);
        if (!is_gimple_reg (SSA_NAME_VAR (name)))
-        bitmap_set_bit (vect_vnames_to_rename, SSA_NAME_VERSION (name));
+        bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
  
        /** 1. Handle new-merge-point phis  **/
  
@@ -559,6 +553,9 @@ slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
  
        /** 2. Handle loop-closed-ssa-form phis  **/
  
+      if (!is_gimple_reg (PHI_RESULT (orig_phi)))
+       continue;
+
        /* 2.1. Generate new phi node in NEW_EXIT_BB:  */
        new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
                                   *new_exit_bb);
@@ -823,8 +820,7 @@ slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
     on E which is either the entry or exit of LOOP.  */
  
  static struct loop *
-slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops, 
-                                       edge e)
+slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
  {
    struct loop *new_loop;
    basic_block *new_bbs, *bbs;
@@ -848,7 +844,7 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
      }
  
    /* Generate new loop structure.  */
-  new_loop = duplicate_loop (loops, loop, loop->outer);
+  new_loop = duplicate_loop (loop, loop->outer);
    if (!new_loop)
      {
        free (bbs);
@@ -866,7 +862,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
    copy_bbs (bbs, loop->num_nodes, new_bbs,
             &exit, 1, &new_exit, NULL,
             e->src);
-  set_single_exit (new_loop, new_exit);
  
    /* Duplicating phi args at exit bbs as coming 
       also from exit of duplicated loop.  */
@@ -1067,9 +1062,10 @@ slpeel_verify_cfg_after_peeling (struct loop *first_loop,
  */
  
  struct loop*
-slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops, 
+slpeel_tree_peel_loop_to_edge (struct loop *loop, 
                                edge e, tree first_niters, 
-                              tree niters, bool update_first_loop_count)
+                              tree niters, bool update_first_loop_count,
+                              unsigned int th)
  {
    struct loop *new_loop = NULL, *first_loop, *second_loop;
    edge skip_e;
@@ -1106,7 +1102,7 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
          orig_exit_bb:
     */
    
-  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
+  if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
      {
        loop_loc = find_loop_location (loop);
        if (dump_file && (dump_flags & TDF_DETAILS))
@@ -1162,7 +1158,8 @@ slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
  
    pre_condition =
      fold_build2 (LE_EXPR, boolean_type_node, first_niters, 
-                 build_int_cst (TREE_TYPE (first_niters), 0));
+       build_int_cst (TREE_TYPE (first_niters), th));
+
    skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
                                    bb_before_second_loop, bb_before_first_loop);
    slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
@@ -1236,7 +1233,7 @@ find_loop_location (struct loop *loop)
  
    node = get_loop_exit_condition (loop);
  
-  if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node)
+  if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node)
        && EXPR_FILENAME (node) && EXPR_LINENO (node))
      return EXPR_LOC (node);
  
@@ -1251,7 +1248,7 @@ find_loop_location (struct loop *loop)
    for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
      {
        node = bsi_stmt (si);
-      if (node && EXPR_P (node) && EXPR_HAS_LOCATION (node))
+      if (node && CAN_HAVE_LOCATION_P (node) && EXPR_HAS_LOCATION (node))
          return EXPR_LOC (node);
      }
  
@@ -1370,6 +1367,13 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
    else
      STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
    STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
+  DR_GROUP_FIRST_DR (res) = NULL_TREE;
+  DR_GROUP_NEXT_DR (res) = NULL_TREE;
+  DR_GROUP_SIZE (res) = 0;
+  DR_GROUP_STORE_COUNT (res) = 0;
+  DR_GROUP_GAP (res) = 0;
+  DR_GROUP_SAME_DR_STMT (res) = NULL_TREE;
+  DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
  
    return res;
  }
@@ -1678,7 +1682,7 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
      }
  
    /* empty stmt is expected only in case of a function argument.
-     (Otherwise - we expect a phi_node or a modify_expr).  */
+     (Otherwise - we expect a phi_node or a GIMPLE_MODIFY_STMT).  */
    if (IS_EMPTY_STMT (*def_stmt))
      {
        tree arg = TREE_OPERAND (*def_stmt, 0);
@@ -1730,8 +1734,8 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
                    || *dt == vect_invariant_def);
        break;
  
-    case MODIFY_EXPR:
-      *def = TREE_OPERAND (*def_stmt, 0);
+    case GIMPLE_MODIFY_STMT:
+      *def = GIMPLE_STMT_OPERAND (*def_stmt, 0);
        gcc_assert (*dt == vect_loop_def || *dt == vect_invariant_def);
        break;
  
@@ -1741,13 +1745,6 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
        return false;
      }
  
-  if (*dt == vect_induction_def)
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "induction not supported.");
-      return false;
-    }
-
    return true;
  }
  
@@ -1759,7 +1756,7 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
     vector form (i.e., when operating on arguments of type VECTYPE).
      
     The two kinds of widening operations we currently support are
-   NOP and WIDEN_MULT. This function checks if these oprations
+   NOP and WIDEN_MULT. This function checks if these operations
     are supported by the target platform either directly (via vector 
     tree-codes), or via target builtins.
  
@@ -1780,7 +1777,7 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
    enum machine_mode vec_mode;
    enum insn_code icode1, icode2;
    optab optab1, optab2;
-  tree expr = TREE_OPERAND (stmt, 1);
+  tree expr = GIMPLE_STMT_OPERAND (stmt, 1);
    tree type = TREE_TYPE (expr);
    tree wide_vectype = get_vectype_for_scalar_type (type);
    enum tree_code c1, c2;
@@ -1793,9 +1790,9 @@ supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
          vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8]. 
  
       However, in the special case that the result of the widening operation is 
-     used in a reduction copmutation only, the order doesn't matter (because 
+     used in a reduction computation only, the order doesn't matter (because
       when vectorizing a reduction we change the order of the computation). 
-     Some targets can take advatage of this and generate more efficient code. 
+     Some targets can take advantage of this and generate more efficient code.
       For example, targets like Altivec, that support widen_mult using a sequence
       of {mult_even,mult_odd} generate the following vectors:
          vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].  */
@@ -1938,14 +1935,35 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
    int op_type;
    tree operation, op1, op2;
    tree type;
+  int nloop_uses;
+  tree name;
+  imm_use_iterator imm_iter;
+  use_operand_p use_p;
  
-  if (TREE_CODE (loop_arg) != SSA_NAME)
+  name = PHI_RESULT (phi);
+  nloop_uses = 0;
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
      {
-      if (vect_print_dump_info (REPORT_DETAILS))
+      tree use_stmt = USE_STMT (use_p);
+      if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
+         && vinfo_for_stmt (use_stmt)
+         && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
+        nloop_uses++;
+      if (nloop_uses > 1)
          {
-          fprintf (vect_dump, "reduction: not ssa_name: ");
-          print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
+          if (vect_print_dump_info (REPORT_DETAILS))
+            fprintf (vect_dump, "reduction used in loop.");
+          return NULL_TREE;
          }
+    }
+
+  if (TREE_CODE (loop_arg) != SSA_NAME)
+    {
+      if (vect_print_dump_info (REPORT_DETAILS))
+       {
+         fprintf (vect_dump, "reduction: not ssa_name: ");
+         print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
+       }
        return NULL_TREE;
      }
  
@@ -1953,20 +1971,35 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
    if (!def_stmt)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "reduction: no def_stmt.");
+       fprintf (vect_dump, "reduction: no def_stmt.");
        return NULL_TREE;
      }
  
-  if (TREE_CODE (def_stmt) != MODIFY_EXPR)
+  if (TREE_CODE (def_stmt) != GIMPLE_MODIFY_STMT)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
-        {
-          print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
-        }
+        print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
        return NULL_TREE;
      }
  
-  operation = TREE_OPERAND (def_stmt, 1);
+  name = GIMPLE_STMT_OPERAND (def_stmt, 0);
+  nloop_uses = 0;
+  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
+    {
+      tree use_stmt = USE_STMT (use_p);
+      if (flow_bb_inside_loop_p (loop, bb_for_stmt (use_stmt))
+         && vinfo_for_stmt (use_stmt)
+         && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
+       nloop_uses++;
+      if (nloop_uses > 1)
+       {
+         if (vect_print_dump_info (REPORT_DETAILS))
+           fprintf (vect_dump, "reduction used in loop.");
+         return NULL_TREE;
+       }
+    }
+
+  operation = GIMPLE_STMT_OPERAND (def_stmt, 1);
    code = TREE_CODE (operation);
    if (!commutative_tree_code (code) || !associative_tree_code (code))
      {
@@ -1978,7 +2011,7 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
        return NULL_TREE;
      }
  
-  op_type = TREE_CODE_LENGTH (code);
+  op_type = TREE_OPERAND_LENGTH (operation);
    if (op_type != binary_op)
      {
        if (vect_print_dump_info (REPORT_DETAILS))
@@ -2029,7 +2062,7 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
          }
        return NULL_TREE;
      }
-  else if (INTEGRAL_TYPE_P (type) && !TYPE_UNSIGNED (type) && flag_trapv)
+  else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type))
      {
        /* Changing the order of operations changes the semantics.  */
        if (vect_print_dump_info (REPORT_DETAILS))
@@ -2046,7 +2079,7 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
     */
    def1 = SSA_NAME_DEF_STMT (op1);
    def2 = SSA_NAME_DEF_STMT (op2);
-  if (!def1 || !def2)
+  if (!def1 || !def2 || IS_EMPTY_STMT (def1) || IS_EMPTY_STMT (def2))
      {
        if (vect_print_dump_info (REPORT_DETAILS))
          {
@@ -2056,9 +2089,15 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
        return NULL_TREE;
      }
  
-  if (TREE_CODE (def1) == MODIFY_EXPR
+
+  /* Check that one def is the reduction def, defined by PHI,
+     the other def is either defined in the loop by a GIMPLE_MODIFY_STMT,
+     or it's an induction (defined by some phi node).  */
+
+  if (def2 == phi
        && flow_bb_inside_loop_p (loop, bb_for_stmt (def1))
-      && def2 == phi)
+      && (TREE_CODE (def1) == GIMPLE_MODIFY_STMT 
+         || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def))
      {
        if (vect_print_dump_info (REPORT_DETAILS))
          {
@@ -2067,9 +2106,10 @@ vect_is_simple_reduction (struct loop *loop, tree phi)
          }
        return def_stmt;
      }
-  else if (TREE_CODE (def2) == MODIFY_EXPR
-      && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
-      && def1 == phi)
+  else if (def1 == phi
+          && flow_bb_inside_loop_p (loop, bb_for_stmt (def2))
+          && (TREE_CODE (def2) == GIMPLE_MODIFY_STMT 
+              || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def))
      {
        /* Swap operands (just for simplicity - so that the rest of the code
          can assume that the reduction variable is always the last (second)
@@ -2106,7 +2146,6 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
  {
    tree init_expr;
    tree step_expr;
-  
    tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
  
    /* When there is no evolution in this loop, the evolution function
@@ -2120,8 +2159,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
      return false;
    
    step_expr = evolution_part;
-  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
-                                                           loop_nb));
+  init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
  
    if (vect_print_dump_info (REPORT_DETAILS))
      {
@@ -2135,7 +2173,7 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
    *step = step_expr;
  
    if (TREE_CODE (step_expr) != INTEGER_CST)
-    {
+    { 
        if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "step unknown.");
        return false;
@@ -2150,31 +2188,30 @@ vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
     Entry Point to loop vectorization phase.  */
  
  unsigned
-vectorize_loops (struct loops *loops)
+vectorize_loops (void)
  {
    unsigned int i;
    unsigned int num_vectorized_loops = 0;
+  unsigned int vect_loops_num;
+  loop_iterator li;
+  struct loop *loop;
  
    /* Fix the verbosity level if not defined explicitly by the user.  */
    vect_set_dump_settings ();
  
    /* Allocate the bitmap that records which virtual variables that 
       need to be renamed.  */
-  vect_vnames_to_rename = BITMAP_ALLOC (NULL);
+  vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
  
    /*  ----------- Analyze loops. -----------  */
  
    /* If some loop was duplicated, it gets bigger number 
       than all previously defined loops. This fact allows us to run 
       only over initial loops skipping newly generated ones.  */
-  vect_loops_num = loops->num;
-  for (i = 1; i < vect_loops_num; i++)
+  vect_loops_num = number_of_loops ();
+  FOR_EACH_LOOP (li, loop, 0)
      {
        loop_vec_info loop_vinfo;
-      struct loop *loop = loops->parray[i];
-
-      if (!loop)
-        continue;
  
        vect_loop_location = find_loop_location (loop);
        loop_vinfo = vect_analyze_loop (loop);
@@ -2183,7 +2220,7 @@ vectorize_loops (struct loops *loops)
        if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
         continue;
  
-      vect_transform_loop (loop_vinfo, loops);
+      vect_transform_loop (loop_vinfo);
        num_vectorized_loops++;
      }
    vect_loop_location = UNKNOWN_LOC;
@@ -2194,13 +2231,13 @@ vectorize_loops (struct loops *loops)
  
    /*  ----------- Finalize. -----------  */
  
-  BITMAP_FREE (vect_vnames_to_rename);
+  BITMAP_FREE (vect_memsyms_to_rename);
  
    for (i = 1; i < vect_loops_num; i++)
      {
-      struct loop *loop = loops->parray[i];
        loop_vec_info loop_vinfo;
  
+      loop = get_loop (i);
        if (!loop)
         continue;
        loop_vinfo = loop->aux;
@@ -2210,3 +2247,69 @@ vectorize_loops (struct loops *loops)
  
    return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
  }
+
+/* Increase alignment of global arrays to improve vectorization potential.
+   TODO:
+   - Consider also structs that have an array field.
+   - Use ipa analysis to prune arrays that can't be vectorized?
+     This should involve global alignment analysis and in the future also
+     array padding.  */
+
+static unsigned int
+increase_alignment (void)
+{
+  struct varpool_node *vnode;
+
+  /* Increase the alignment of all global arrays for vectorization.  */
+  for (vnode = varpool_nodes_queue;
+       vnode;
+       vnode = vnode->next_needed)
+    {
+      tree vectype, decl = vnode->decl;
+      unsigned int alignment;
+
+      if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
+       continue;
+      vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
+      if (!vectype)
+       continue;
+      alignment = TYPE_ALIGN (vectype);
+      if (DECL_ALIGN (decl) >= alignment)
+       continue;
+
+      if (vect_can_force_dr_alignment_p (decl, alignment))
+       { 
+         DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
+         DECL_USER_ALIGN (decl) = 1;
+         if (dump_file)
+           { 
+             fprintf (dump_file, "Increasing alignment of decl: ");
+             print_generic_expr (dump_file, decl, TDF_SLIM);
+           }
+       }
+    }
+  return 0;
+}
+
+static bool
+gate_increase_alignment (void)
+{
+  return flag_section_anchors && flag_tree_vectorize;
+}
+
+struct tree_opt_pass pass_ipa_increase_alignment = 
+{
+  "increase_alignment",                        /* name */
+  gate_increase_alignment,             /* gate */
+  increase_alignment,                  /* execute */
+  NULL,                                        /* sub */
+  NULL,                                        /* next */
+  0,                                   /* static_pass_number */
+  0,                                   /* tv_id */
+  0,                                   /* properties_required */
+  0,                                   /* properties_provided */
+  0,                                   /* properties_destroyed */
+  0,                                   /* todo_flags_start */
+  0,                                   /* todo_flags_finish */
+  0                                    /* letter */
+};