OSDN Git Service

2007-01-26 Andrew Haley <aph@redhat.com>
[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-transform.c
index d1caf5c..846d52b 100644 (file)
@@ -35,6 +35,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
 #include "cfgloop.h"
 #include "expr.h"
 #include "optabs.h"
+#include "params.h"
 #include "recog.h"
 #include "tree-data-ref.h"
 #include "tree-chrec.h"
@@ -106,6 +107,10 @@ vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
   else
     new_vect_var = create_tmp_var (type, prefix);
 
+  /* Mark vector typed variable as a gimple register variable.  */
+  if (TREE_CODE (type) == VECTOR_TYPE)
+    DECL_GIMPLE_REG_P (new_vect_var) = true;
+
   return new_vect_var;
 }
 
@@ -301,7 +306,7 @@ vect_create_data_ref_ptr (tree stmt,
   if (!MTAG_P (tag))
     new_type_alias (vect_ptr, tag, DR_REF (dr));
   else
-    var_ann (vect_ptr)->symbol_mem_tag = tag;
+    set_symbol_mem_tag (vect_ptr, tag);
 
   var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
 
@@ -660,7 +665,7 @@ vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
    vector stmt (each computing a vector of 'nunits' results, and together
    computing 'VF' results in each iteration).  This function is called when 
    vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
-   which VF=16 and nuniti=4, so the number of copies required is 4):
+   which VF=16 and nunits=4, so the number of copies required is 4):
 
    scalar stmt:         vectorized into:        STMT_VINFO_RELATED_STMT
  
@@ -1660,7 +1665,7 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
 
       return false;
     }
-  gcc_assert (!stmt_references_memory_p (stmt));
+  gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
 
   for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
     {
@@ -2166,6 +2171,10 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
                                                                                 
   ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
   gcc_assert (ncopies >= 1);
+
+  if (! INTEGRAL_TYPE_P (scalar_type)
+      || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+    return false;
                                                                                 
   /* Check the operands of the operation.  */
   if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
@@ -2372,6 +2381,10 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
   if (nunits_out != nunits_in / 2) /* FORNOW */
     return false;
 
+  if (! INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+      || !INTEGRAL_TYPE_P (TREE_TYPE (op0))) 
+    return false;
+
   /* Check the operands of the operation.  */
   if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
     {
@@ -2592,23 +2605,43 @@ vect_permute_store_chain (VEC(tree,heap) *dr_chain,
          vect1 = VEC_index (tree, dr_chain, j);
          vect2 = VEC_index (tree, dr_chain, j+length/2);
 
-         /* high = interleave_high (vect1, vect2);  */
+         /* Create interleaving stmt:
+            in the case of big endian: 
+                                high = interleave_high (vect1, vect2) 
+             and in the case of little endian: 
+                                high = interleave_low (vect1, vect2).  */
          perm_dest = create_tmp_var (vectype, "vect_inter_high");
+         DECL_GIMPLE_REG_P (perm_dest) = 1;
          add_referenced_var (perm_dest);
-         perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
-                             build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, 
-                                     vect2));
+          if (BYTES_BIG_ENDIAN)
+           perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+                               build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, 
+                                        vect1, vect2)); 
+         else
+            perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+                                build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, 
+                                        vect1, vect2));
          high = make_ssa_name (perm_dest, perm_stmt);
          GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
          vect_finish_stmt_generation (stmt, perm_stmt, bsi);
          VEC_replace (tree, *result_chain, 2*j, high);
 
-         /* low = interleave_low (vect1, vect2);  */
+         /* Create interleaving stmt:
+             in the case of big endian:
+                               low  = interleave_low (vect1, vect2) 
+             and in the case of little endian:
+                               low  = interleave_high (vect1, vect2).  */     
          perm_dest = create_tmp_var (vectype, "vect_inter_low");
+         DECL_GIMPLE_REG_P (perm_dest) = 1;
          add_referenced_var (perm_dest);
-         perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
-                             build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, 
-                                     vect2));
+         if (BYTES_BIG_ENDIAN)
+           perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+                               build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, 
+                                       vect1, vect2));
+         else
+            perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+                                build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, 
+                                        vect1, vect2));
          low = make_ssa_name (perm_dest, perm_stmt);
          GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
          vect_finish_stmt_generation (stmt, perm_stmt, bsi);
@@ -2747,7 +2780,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
         S3:  &base + 1 = x1
         S4:  &base + 3 = x3
 
-     We create vectorized storess starting from base address (the access of the
+     We create vectorized stores starting from base address (the access of the
      first stmt in the chain (S2 in the above example), when the last store stmt
      of the chain (S4) is reached:
 
@@ -2786,8 +2819,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
             as an input to vect_permute_store_chain(), and OPRNDS as an input
             to vect_get_vec_def_for_stmt_copy() for the next copy.
             If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
-            OPRNDS are of size 1.
-         */
+            OPRNDS are of size 1.  */
          next_stmt = first_stmt;         
          for (i = 0; i < group_size; i++)
            {
@@ -2795,8 +2827,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
                 is the exact number of stmts in the chain. Therefore, NEXT_STMT
                 can't be NULL_TREE.  In case that there is no interleaving, 
                 GROUP_SIZE is 1, and only one iteration of the loop will be 
-                executed.
-             */
+                executed.  */
              gcc_assert (next_stmt);
              op = GIMPLE_STMT_OPERAND (next_stmt, 1);
              vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
@@ -2816,8 +2847,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
             and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the 
             next copy.
             If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
-            OPRNDS are of size 1.
-         */
+            OPRNDS are of size 1.  */
          for (i = 0; i < group_size; i++)
            {
              vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, 
@@ -2851,16 +2881,16 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
                             vec_oprnd);
          vect_finish_stmt_generation (stmt, new_stmt, bsi);
 
-         /* Set the V_MAY_DEFS for the vector pointer. If this virtual def has a 
-            use outside the loop and a loop peel is performed then the def may be 
-            renamed by the peel.  Mark it for renaming so the later use will also 
-            be renamed.  */
+         /* Set the VDEFs for the vector pointer. If this virtual def
+            has a use outside the loop and a loop peel is performed
+            then the def may be renamed by the peel.  Mark it for
+            renaming so the later use will also be renamed.  */
          copy_virtual_operands (new_stmt, next_stmt);
          if (j == 0)
            {
-             /* The original store is deleted so the same SSA_NAMEs can be used.  
-              */
-             FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VMAYDEF)
+             /* The original store is deleted so the same SSA_NAMEs
+                can be used.  */
+             FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
                {
                  SSA_NAME_DEF_STMT (def) = new_stmt;
                  mark_sym_for_renaming (SSA_NAME_VAR (def));
@@ -2872,7 +2902,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
            {
              /* Create new names for all the definitions created by COPY and
                 add replacement mappings for each new name.  */
-             FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VMAYDEF)
+             FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
                {
                  create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
                  mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
@@ -2882,7 +2912,7 @@ vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
            }
 
          prev_stmt_info = vinfo_for_stmt (new_stmt);
-                 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
+         next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
          if (!next_stmt)
            break;
          /* Bump the vector pointer.  */
@@ -3153,6 +3183,7 @@ vect_permute_load_chain (VEC(tree,heap) *dr_chain,
 
          /* data_ref = permute_even (first_data_ref, second_data_ref);  */
          perm_dest = create_tmp_var (vectype, "vect_perm_even");
+         DECL_GIMPLE_REG_P (perm_dest) = 1;
          add_referenced_var (perm_dest);
         
          perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
@@ -3168,6 +3199,7 @@ vect_permute_load_chain (VEC(tree,heap) *dr_chain,
              
          /* data_ref = permute_odd (first_data_ref, second_data_ref);  */
          perm_dest = create_tmp_var (vectype, "vect_perm_odd");
+         DECL_GIMPLE_REG_P (perm_dest) = 1;
          add_referenced_var (perm_dest);
 
          perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
@@ -4037,9 +4069,9 @@ vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
    LOOP - the loop whose preheader will contain STMT.
 
    It's possible to vectorize a loop even though an SSA_NAME from a VUSE
-   appears to be defined in a V_MAY_DEF in another statement in a loop.
+   appears to be defined in a VDEF in another statement in a loop.
    One such case is when the VUSE is at the dereference of a __restricted__
-   pointer in a load and the V_MAY_DEF is at the dereference of a different
+   pointer in a load and the VDEF is at the dereference of a different
    __restricted__ pointer in a store.  Vectorization may result in
    copy_virtual_uses being called to copy the problematic VUSE to a new
    statement that is being inserted in the loop preheader.  This procedure
@@ -4250,6 +4282,7 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
   edge update_e;
   basic_block preheader;
   int loop_num;
+  unsigned int th;
 
   if (vect_print_dump_info (REPORT_DETAILS))
     fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
@@ -4265,8 +4298,11 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
                                   &ratio_mult_vf_name, ratio);
 
   loop_num  = loop->num; 
+  /* Threshold for vectorized loop.  */
+  th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) * 
+                       LOOP_VINFO_VECT_FACTOR (loop_vinfo);
   new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
-                                           ratio_mult_vf_name, ni_name, false);
+                                           ratio_mult_vf_name, ni_name, false, th);
   gcc_assert (new_loop);
   gcc_assert (loop_num == loop->num);
 #ifdef ENABLE_CHECKING
@@ -4491,7 +4527,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
   /* Peel the prolog loop and iterate it niters_of_prolog_loop.  */
   new_loop = 
        slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop), 
-                                      niters_of_prolog_loop, ni_name, true); 
+                                      niters_of_prolog_loop, ni_name, true, 0); 
   gcc_assert (new_loop);
 #ifdef ENABLE_CHECKING
   slpeel_verify_cfg_after_peeling (new_loop, loop);
@@ -4651,8 +4687,6 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   int i;
   tree ratio = NULL;
   int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
-  bitmap_iterator bi;
-  unsigned int j;
   bool strided_store;
 
   if (vect_print_dump_info (REPORT_DETAILS))
@@ -4675,11 +4709,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
       basic_block new_exit_bb;
       edge new_exit_e, e;
       tree orig_phi, new_phi, arg;
+      unsigned prob = 4 * REG_BR_PROB_BASE / 5;
 
       cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
                                                      &cond_expr_stmt_list);
       initialize_original_copy_tables ();
-      nloop = loop_version (loop, cond_expr, &condition_bb, true);
+      nloop = loop_version (loop, cond_expr, &condition_bb,
+                           prob, prob, REG_BR_PROB_BASE - prob, true);
       free_original_copy_tables();
 
       /** Loop versioning violates an assumption we try to maintain during 
@@ -4715,7 +4751,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 
   /* CHECKME: we wouldn't need this if we called update_ssa once
      for all loops.  */
-  bitmap_zero (vect_vnames_to_rename);
+  bitmap_zero (vect_memsyms_to_rename);
 
   /* Peel the loop if there are data refs with unknown alignment.
      Only one data ref with unknown store is allowed.  */
@@ -4837,8 +4873,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 
   slpeel_make_loop_iterate_ntimes (loop, ratio);
 
-  EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
-    mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
+  mark_set_for_renaming (vect_memsyms_to_rename);
 
   /* The memory tags and pointers in vectorized statements need to
      have their SSA forms updated.  FIXME, why can't this be delayed