* config/fixed-bit.c, config/i386/cpuid.h, config/i386/i386.c,

[pf3gnuchains/gcc-fork.git] / gcc / tree-vect-transform.c
diff --git a/gcc/tree-vect-transform.c b/gcc/tree-vect-transform.c

index e2ee92b..c718e07 100644 (file)
--- a/gcc/tree-vect-transform.c
+++ b/gcc/tree-vect-transform.c
@@ -137,15 +137,32 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
        return 0;
      }
  
-  /* Requires loop versioning tests to handle misalignment.
-     FIXME: Make cost depend on number of stmts in may_misalign list.  */
+  /* Requires loop versioning tests to handle misalignment.  */
  
    if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
      {
-      vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
+      /*  FIXME: Make cost depend on complexity of individual check.  */
+      vec_outside_cost +=
+        VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+      if (vect_print_dump_info (REPORT_DETAILS))
+        fprintf (vect_dump, "cost model: Adding cost of checks for loop "
+                 "versioning to treat misalignment.\n");
+    }
+
+  if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    {
+      /*  FIXME: Make cost depend on complexity of individual check.  */
+      vec_outside_cost +=
+        VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
        if (vect_print_dump_info (REPORT_DETAILS))
          fprintf (vect_dump, "cost model: Adding cost of checks for loop "
-                 "versioning.\n");
+                 "versioning aliasing.\n");
+    }
+
+  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+      || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    {
+      vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
      }
  
    /* Count statements in scalar loop.  Using this as scalar cost for a single
@@ -1364,7 +1381,7 @@ vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
  }
  
  
-/* Get vectorized defintions from SLP_NODE that contains corresponding
+/* Get vectorized definitions from SLP_NODE that contains corresponding
     vectorized def-stmts.  */
   
  static void
@@ -1392,7 +1409,9 @@ vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
     call vect_get_constant_vectors() to create vector stmts.
     Otherwise, the def-stmts must be already vectorized and the vectorized stmts
     must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
-   vect_get_slp_vect_defs() to retrieve them.  */
+   vect_get_slp_vect_defs() to retrieve them.  
+   If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
+   the right node. This is used when the second operand must remain scalar.  */
   
  static void
  vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
@@ -1420,7 +1439,7 @@ vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
      return;
  
    operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
-  if (TREE_OPERAND_LENGTH (operation) == unary_op)
+  if (TREE_OPERAND_LENGTH (operation) == unary_op || !vec_oprnds1)
      return;
  
    *vec_oprnds1 = VEC_alloc (tree, heap, 
@@ -1936,7 +1955,7 @@ vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
    vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
    VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
  
-  if (vec_oprnds1)
+  if (vec_oprnds1 && *vec_oprnds1)
      {
        vec_oprnd = VEC_pop (tree, *vec_oprnds1);
        vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
@@ -2997,10 +3016,14 @@ vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
      return false;
  
    vectype_in = get_vectype_for_scalar_type (rhs_type);
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
    vectype_out = get_vectype_for_scalar_type (lhs_type);
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
  
    /* FORNOW */
@@ -3307,11 +3330,15 @@ vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
    op0 = TREE_OPERAND (operation, 0);
    rhs_type = TREE_TYPE (op0);
    vectype_in = get_vectype_for_scalar_type (rhs_type);
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    lhs_type = TREE_TYPE (scalar_dest);
    vectype_out = get_vectype_for_scalar_type (lhs_type);
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
  
    /* FORNOW */
@@ -3781,6 +3808,8 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    if (nunits_out != nunits_in)
      return false;
@@ -3891,11 +3920,9 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
    vec_dest = vect_create_destination_var (scalar_dest, vectype);
  
    if (!slp_node)
-    {
-      vec_oprnds0 = VEC_alloc (tree, heap, 1);
-      if (op_type == binary_op)
-       vec_oprnds1 = VEC_alloc (tree, heap, 1);
-    }
+    vec_oprnds0 = VEC_alloc (tree, heap, 1);
+  if (op_type == binary_op)
+    vec_oprnds1 = VEC_alloc (tree, heap, 1);
  
    /* In case the vectorization factor (VF) is bigger than the number
       of elements that we can fit in a vectype (nunits), we have to generate
@@ -3957,8 +3984,7 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
        if (j == 0)
         {
           if (op_type == binary_op
-             && (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
-              && !slp_node)
+             && (code == LSHIFT_EXPR || code == RSHIFT_EXPR))
             {
               /* Vector shl and shr insn patterns can be defined with scalar 
                  operand 2 (shift operand). In this case, use constant or loop 
@@ -3974,11 +4000,14 @@ vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
                 }
             }
          
+          /* vec_oprnd is available if operand 1 should be of a scalar-type 
+             (a special case for certain kind of vector shifts); otherwise, 
+             operand 1 should be of a vector type (the usual case).  */
           if (op_type == binary_op && !vec_oprnd1)
             vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1, 
                                slp_node);
           else
-           vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, &vec_oprnds1, 
+           vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL, 
                                slp_node);
         }
        else
@@ -4081,10 +4110,14 @@ vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
  
    op0 = TREE_OPERAND (operation, 0);
    vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    if (nunits_in != nunits_out / 2) /* FORNOW */
      return false;
@@ -4239,10 +4272,14 @@ vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
  
    op0 = TREE_OPERAND (operation, 0);
    vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
+  if (!vectype_in)
+    return false;
    nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
  
    scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
    vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+  if (!vectype_out)
+    return false;
    nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
    if (nunits_out != nunits_in / 2) /* FORNOW */
      return false;
@@ -6850,31 +6887,18 @@ vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
  static tree
  vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
  {
-  tree segment_length;
+  tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
+                                    DR_STEP (dr), vect_factor);
  
    if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
      {
-      tree vector_size =
-        build_int_cst (integer_type_node,
-          GET_MODE_SIZE (TYPE_MODE (STMT_VINFO_VECTYPE
-           (vinfo_for_stmt (DR_STMT (dr))))));
+      tree vector_size = TYPE_SIZE_UNIT
+                         (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
  
-      segment_length =
-       fold_convert (sizetype,
-         fold_build2 (PLUS_EXPR, integer_type_node,
-           fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
-                        vect_factor),
-           vector_size));
+      segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
+                                   segment_length, vector_size);
      }
-  else
-    {
-      segment_length =
-       fold_convert (sizetype,
-         fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
-                      vect_factor));
-    }
-
-    return segment_length;
+  return fold_convert (sizetype, segment_length);
  }
  
  /* Function vect_create_cond_for_alias_checks.
@@ -6893,6 +6917,8 @@ vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
     COND_EXPR - conditional expression.
     COND_EXPR_STMT_LIST - statements needed to construct the conditional
                           expression.
+
+
     The returned value is the conditional expression to be used in the if
     statement that controls which version of the loop gets executed at runtime.
  */
@@ -6926,26 +6952,47 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
  
    for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
      {
-      tree stmt_a = DR_STMT (DDR_A (ddr));
-      tree stmt_b = DR_STMT (DDR_B (ddr));
+      struct data_reference *dr_a, *dr_b;
+      tree dr_group_first_a, dr_group_first_b;
+      tree addr_base_a, addr_base_b;
+      tree segment_length_a, segment_length_b;
+      tree stmt_a, stmt_b;
  
-      tree addr_base_a =
+      dr_a = DDR_A (ddr);
+      stmt_a = DR_STMT (DDR_A (ddr));
+      dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
+      if (dr_group_first_a)
+        {
+         stmt_a = dr_group_first_a;
+         dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
+       }
+
+      dr_b = DDR_B (ddr);
+      stmt_b = DR_STMT (DDR_B (ddr));
+      dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
+      if (dr_group_first_b)
+        {
+         stmt_b = dr_group_first_b;
+         dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
+       }
+
+      addr_base_a =
          vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
                                               NULL_TREE, loop);
-      tree addr_base_b =
+      addr_base_b =
          vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
                                               NULL_TREE, loop);
  
-      tree segment_length_a = vect_vfa_segment_size (DDR_A (ddr), vect_factor);
-      tree segment_length_b = vect_vfa_segment_size (DDR_B (ddr), vect_factor);
+      segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
+      segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
  
        if (vect_print_dump_info (REPORT_DR_DETAILS))
         {
           fprintf (vect_dump,
                    "create runtime check for data references ");
-         print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
+         print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
           fprintf (vect_dump, " and ");
-         print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
+         print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
         }
  
  
@@ -6974,6 +7021,91 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
  
  }
  
+/* Function vect_loop_versioning.
+ 
+   If the loop has data references that may or may not be aligned or/and
+   has data reference relations whose independence was not proven then
+   two versions of the loop need to be generated, one which is vectorized
+   and one which isn't.  A test is then generated to control which of the
+   loops is executed.  The test checks for the alignment of all of the
+   data references that may or may not be aligned.  An additional
+   sequence of runtime tests is generated for each pairs of DDRs whose
+   independence was not proven.  The vectorized version of loop is 
+   executed only if both alias and alignment tests are passed.  */
+
+static void
+vect_loop_versioning (loop_vec_info loop_vinfo)
+{
+  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  struct loop *nloop;
+  tree cond_expr = NULL_TREE;
+  tree cond_expr_stmt_list = NULL_TREE;
+  basic_block condition_bb;
+  block_stmt_iterator cond_exp_bsi;
+  basic_block merge_bb;
+  basic_block new_exit_bb;
+  edge new_exit_e, e;
+  tree orig_phi, new_phi, arg;
+  unsigned prob = 4 * REG_BR_PROB_BASE / 5;
+  tree gimplify_stmt_list;
+
+  if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+      && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    return;
+
+  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+    cond_expr =
+      vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
+
+  if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+    vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, &cond_expr_stmt_list);
+
+  cond_expr =
+    fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
+  cond_expr =
+    force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
+                         NULL_TREE);
+  append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
+
+  initialize_original_copy_tables ();
+  nloop = loop_version (loop, cond_expr, &condition_bb,
+                       prob, prob, REG_BR_PROB_BASE - prob, true);
+  free_original_copy_tables();
+
+  /* Loop versioning violates an assumption we try to maintain during 
+     vectorization - that the loop exit block has a single predecessor.
+     After versioning, the exit block of both loop versions is the same
+     basic block (i.e. it has two predecessors). Just in order to simplify
+     following transformations in the vectorizer, we fix this situation
+     here by adding a new (empty) block on the exit-edge of the loop,
+     with the proper loop-exit phis to maintain loop-closed-form.  */
+  
+  merge_bb = single_exit (loop)->dest;
+  gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
+  new_exit_bb = split_edge (single_exit (loop));
+  new_exit_e = single_exit (loop);
+  e = EDGE_SUCC (new_exit_bb, 0);
+
+  for (orig_phi = phi_nodes (merge_bb); orig_phi; 
+       orig_phi = PHI_CHAIN (orig_phi))
+    {
+      new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
+                                 new_exit_bb);
+      arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+      add_phi_arg (new_phi, arg, new_exit_e);
+      SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
+    } 
+
+  /* End loop-exit-fixes after versioning.  */
+
+  update_ssa (TODO_update_ssa);
+  if (cond_expr_stmt_list)
+    {
+      cond_exp_bsi = bsi_last (condition_bb);
+      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
+    }
+}
+
  /* Remove a group of stores (for SLP or interleaving), free their 
     stmt_vec_info.  */
  
@@ -7082,7 +7214,6 @@ vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
    return is_store;
  }
  
-
  /* Function vect_transform_loop.
  
     The analysis phase has determined that the loop is vectorizable.
@@ -7105,82 +7236,7 @@ vect_transform_loop (loop_vec_info loop_vinfo)
  
    if (vect_print_dump_info (REPORT_DETAILS))
      fprintf (vect_dump, "=== vec_transform_loop ===");
-
-  /* If the loop has data references that may or may not be aligned or/and
-     has data reference relations whose independence was not proven then
-     two versions of the loop need to be generated, one which is vectorized
-     and one which isn't.  A test is then generated to control which of the
-     loops is executed.  The test checks for the alignment of all of the
-     data references that may or may not be aligned.  An additional
-     sequence of runtime tests is generated for each pairs of DDRs whose
-     independence was not proven.  The vectorized version of loop is 
-     executed only if both alias and alignment tests are passed.  */
-
-  if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
-      || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
-    {
-      struct loop *nloop;
-      tree cond_expr = NULL_TREE;
-      tree cond_expr_stmt_list = NULL_TREE;
-      basic_block condition_bb;
-      block_stmt_iterator cond_exp_bsi;
-      basic_block merge_bb;
-      basic_block new_exit_bb;
-      edge new_exit_e, e;
-      tree orig_phi, new_phi, arg;
-      unsigned prob = 4 * REG_BR_PROB_BASE / 5;
-      tree gimplify_stmt_list;
-
-      if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
-       cond_expr =
-         vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
-
-      if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
-       vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
-                                                     &cond_expr_stmt_list);
-
-      cond_expr =
-        fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
-      cond_expr =
-        force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
-                              NULL_TREE);
-      append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
-
-      initialize_original_copy_tables ();
-      nloop = loop_version (loop, cond_expr, &condition_bb,
-                           prob, prob, REG_BR_PROB_BASE - prob, true);
-      free_original_copy_tables();
-
-      /** Loop versioning violates an assumption we try to maintain during 
-        vectorization - that the loop exit block has a single predecessor.
-        After versioning, the exit block of both loop versions is the same
-        basic block (i.e. it has two predecessors). Just in order to simplify
-        following transformations in the vectorizer, we fix this situation
-        here by adding a new (empty) block on the exit-edge of the loop,
-        with the proper loop-exit phis to maintain loop-closed-form.  **/
-      
-      merge_bb = single_exit (loop)->dest;
-      gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
-      new_exit_bb = split_edge (single_exit (loop));
-      new_exit_e = single_exit (loop);
-      e = EDGE_SUCC (new_exit_bb, 0);
-
-      for (orig_phi = phi_nodes (merge_bb); orig_phi; 
-          orig_phi = PHI_CHAIN (orig_phi))
-       {
-          new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
-                                    new_exit_bb);
-          arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
-          add_phi_arg (new_phi, arg, new_exit_e);
-         SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
-       } 
-
-      /** end loop-exit-fixes after versioning  **/
-
-      update_ssa (TODO_update_ssa);
-      cond_exp_bsi = bsi_last (condition_bb);
-      bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
-    }
+  vect_loop_versioning (loop_vinfo);
  
    /* CHECKME: we wouldn't need this if we called update_ssa once
       for all loops.  */