+/* Function vect_loop_versioning.
+
+ If the loop has data references that may or may not be aligned or/and
+ has data reference relations whose independence was not proven then
+ two versions of the loop need to be generated, one which is vectorized
+ and one which isn't. A test is then generated to control which of the
+ loops is executed. The test checks for the alignment of all of the
+ data references that may or may not be aligned. An additional
+ sequence of runtime tests is generated for each pairs of DDRs whose
+ independence was not proven. The vectorized version of loop is
+ executed only if both alias and alignment tests are passed.
+
+ The test generated to check which version of loop is executed
+ is modified to also check for profitability as indicated by the
+ cost model initially. */
+
+static void
+vect_loop_versioning (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *nloop;
+ tree cond_expr = NULL_TREE;
+ tree cond_expr_stmt_list = NULL_TREE;
+ basic_block condition_bb;
+ block_stmt_iterator cond_exp_bsi;
+ basic_block merge_bb;
+ basic_block new_exit_bb;
+ edge new_exit_e, e;
+ tree orig_phi, new_phi, arg;
+ unsigned prob = 4 * REG_BR_PROB_BASE / 5;
+ tree gimplify_stmt_list;
+ tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
+ int min_profitable_iters = 0;
+ unsigned int th;
+
+ /* Get profitability threshold for vectorized loop. */
+ min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
+
+ th = conservative_cost_threshold (loop_vinfo,
+ min_profitable_iters);
+
+ cond_expr =
+ build2 (GT_EXPR, boolean_type_node, scalar_loop_iters,
+ build_int_cst (TREE_TYPE (scalar_loop_iters), th));
+
+ cond_expr = force_gimple_operand (cond_expr, &cond_expr_stmt_list,
+ false, NULL_TREE);
+
+ if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+ vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
+ &cond_expr_stmt_list);
+
+ if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+ vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
+ &cond_expr_stmt_list);
+
+ cond_expr =
+ fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
+ cond_expr =
+ force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
+ NULL_TREE);
+ append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
+
+ initialize_original_copy_tables ();
+ nloop = loop_version (loop, cond_expr, &condition_bb,
+ prob, prob, REG_BR_PROB_BASE - prob, true);
+ free_original_copy_tables();
+
+ /* Loop versioning violates an assumption we try to maintain during
+ vectorization - that the loop exit block has a single predecessor.
+ After versioning, the exit block of both loop versions is the same
+ basic block (i.e. it has two predecessors). Just in order to simplify
+ following transformations in the vectorizer, we fix this situation
+ here by adding a new (empty) block on the exit-edge of the loop,
+ with the proper loop-exit phis to maintain loop-closed-form. */
+
+ merge_bb = single_exit (loop)->dest;
+ gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
+ new_exit_bb = split_edge (single_exit (loop));
+ new_exit_e = single_exit (loop);
+ e = EDGE_SUCC (new_exit_bb, 0);
+
+ for (orig_phi = phi_nodes (merge_bb); orig_phi;
+ orig_phi = PHI_CHAIN (orig_phi))
+ {
+ new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
+ new_exit_bb);
+ arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+ add_phi_arg (new_phi, arg, new_exit_e);
+ SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
+ }
+
+ /* End loop-exit-fixes after versioning. */
+
+ update_ssa (TODO_update_ssa);
+ if (cond_expr_stmt_list)
+ {
+ cond_exp_bsi = bsi_last (condition_bb);
+ bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
+ }
+}
+
+/* Remove a group of stores (for SLP or interleaving), free their
+ stmt_vec_info. */
+
+static void
+vect_remove_stores (tree first_stmt)
+{
+ tree next = first_stmt;
+ tree tmp;
+ block_stmt_iterator next_si;
+
+ while (next)
+ {
+ /* Free the attached stmt_vec_info and remove the stmt. */
+ next_si = bsi_for_stmt (next);
+ bsi_remove (&next_si, true);
+ tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ free_stmt_vec_info (next);
+ next = tmp;
+ }
+}
+
+
+/* Vectorize SLP instance tree in postorder. */
+
+static bool
+vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
+{
+ tree stmt;
+ bool strided_store, is_store;
+ block_stmt_iterator si;
+ stmt_vec_info stmt_info;
+
+ if (!node)
+ return false;
+
+ vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
+ vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
+
+ stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0);
+ stmt_info = vinfo_for_stmt (stmt);
+ SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size);
+ SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
+ print_generic_expr (vect_dump, stmt, TDF_SLIM);
+ }
+
+ si = bsi_for_stmt (stmt);
+ is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
+ if (is_store)
+ {
+ if (DR_GROUP_FIRST_DR (stmt_info))
+ /* If IS_STORE is TRUE, the vectorization of the
+ interleaving chain was completed - free all the stores in
+ the chain. */
+ vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
+ else
+ /* FORNOW: SLP originates only from strided stores. */
+ gcc_unreachable ();
+
+ return true;
+ }
+
+ /* FORNOW: SLP originates only from strided stores. */
+ return false;
+}
+
+
+static bool
+vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
+{
+ VEC (slp_instance, heap) *slp_instances =
+ LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
+ slp_instance instance;
+ unsigned int vec_stmts_size;
+ unsigned int group_size, i;
+ unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ bool is_store = false;
+
+ for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
+ {
+ group_size = SLP_INSTANCE_GROUP_SIZE (instance);
+ /* For each SLP instance calculate number of vector stmts to be created
+ for the scalar stmts in each node of the SLP tree. Number of vector
+ elements in one vector iteration is the number of scalar elements in
+ one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
+ size. */
+ vec_stmts_size = vectorization_factor * group_size / nunits;
+
+ /* Schedule the tree of INSTANCE. */
+ is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
+ vec_stmts_size);
+
+ if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
+ || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
+ fprintf (vect_dump, "vectorizing stmts using SLP.");
+ }
+
+ return is_store;
+}
+