return 0;
}
- /* Requires loop versioning tests to handle misalignment.
- FIXME: Make cost depend on number of stmts in may_misalign list. */
+ /* Requires loop versioning tests to handle misalignment. */
if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
{
- vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
+ /* FIXME: Make cost depend on complexity of individual check. */
+ vec_outside_cost +=
+ VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: Adding cost of checks for loop "
+ "versioning to treat misalignment.\n");
+ }
+
+ if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+ {
+ /* FIXME: Make cost depend on complexity of individual check. */
+ vec_outside_cost +=
+ VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "cost model: Adding cost of checks for loop "
- "versioning.\n");
+ "versioning aliasing.\n");
+ }
+
+ if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+ || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+ {
+ vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
}
/* Count statements in scalar loop. Using this as scalar cost for a single
}
-/* Get vectorized defintions from SLP_NODE that contains corresponding
+/* Get vectorized definitions from SLP_NODE that contains corresponding
vectorized def-stmts. */
static void
call vect_get_constant_vectors() to create vector stmts.
Otherwise, the def-stmts must be already vectorized and the vectorized stmts
must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
- vect_get_slp_vect_defs() to retrieve them. */
+ vect_get_slp_vect_defs() to retrieve them.
+ If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
+ the right node. This is used when the second operand must remain scalar. */
static void
vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
return;
operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
- if (TREE_OPERAND_LENGTH (operation) == unary_op)
+ if (TREE_OPERAND_LENGTH (operation) == unary_op || !vec_oprnds1)
return;
*vec_oprnds1 = VEC_alloc (tree, heap,
vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
- if (vec_oprnds1)
+ if (vec_oprnds1 && *vec_oprnds1)
{
vec_oprnd = VEC_pop (tree, *vec_oprnds1);
vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
return false;
vectype_in = get_vectype_for_scalar_type (rhs_type);
+ if (!vectype_in)
+ return false;
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
vectype_out = get_vectype_for_scalar_type (lhs_type);
+ if (!vectype_out)
+ return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
/* FORNOW */
op0 = TREE_OPERAND (operation, 0);
rhs_type = TREE_TYPE (op0);
vectype_in = get_vectype_for_scalar_type (rhs_type);
+ if (!vectype_in)
+ return false;
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
lhs_type = TREE_TYPE (scalar_dest);
vectype_out = get_vectype_for_scalar_type (lhs_type);
+ if (!vectype_out)
+ return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
/* FORNOW */
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+ if (!vectype_out)
+ return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (nunits_out != nunits_in)
return false;
vec_dest = vect_create_destination_var (scalar_dest, vectype);
if (!slp_node)
- {
- vec_oprnds0 = VEC_alloc (tree, heap, 1);
- if (op_type == binary_op)
- vec_oprnds1 = VEC_alloc (tree, heap, 1);
- }
+ vec_oprnds0 = VEC_alloc (tree, heap, 1);
+ if (op_type == binary_op)
+ vec_oprnds1 = VEC_alloc (tree, heap, 1);
/* In case the vectorization factor (VF) is bigger than the number
of elements that we can fit in a vectype (nunits), we have to generate
if (j == 0)
{
if (op_type == binary_op
- && (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
- && !slp_node)
+ && (code == LSHIFT_EXPR || code == RSHIFT_EXPR))
{
/* Vector shl and shr insn patterns can be defined with scalar
operand 2 (shift operand). In this case, use constant or loop
}
}
+ /* vec_oprnd is available if operand 1 should be of a scalar-type
+ (a special case for certain kind of vector shifts); otherwise,
+ operand 1 should be of a vector type (the usual case). */
if (op_type == binary_op && !vec_oprnd1)
vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
slp_node);
else
- vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, &vec_oprnds1,
+ vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
slp_node);
}
else
op0 = TREE_OPERAND (operation, 0);
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
+ if (!vectype_in)
+ return false;
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+ if (!vectype_out)
+ return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (nunits_in != nunits_out / 2) /* FORNOW */
return false;
op0 = TREE_OPERAND (operation, 0);
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
+ if (!vectype_in)
+ return false;
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
+ if (!vectype_out)
+ return false;
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (nunits_out != nunits_in / 2) /* FORNOW */
return false;
static tree
vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
{
- tree segment_length;
+ tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
+ DR_STEP (dr), vect_factor);
if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
{
- tree vector_size =
- build_int_cst (integer_type_node,
- GET_MODE_SIZE (TYPE_MODE (STMT_VINFO_VECTYPE
- (vinfo_for_stmt (DR_STMT (dr))))));
+ tree vector_size = TYPE_SIZE_UNIT
+ (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
- segment_length =
- fold_convert (sizetype,
- fold_build2 (PLUS_EXPR, integer_type_node,
- fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
- vect_factor),
- vector_size));
+ segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
+ segment_length, vector_size);
}
- else
- {
- segment_length =
- fold_convert (sizetype,
- fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
- vect_factor));
- }
-
- return segment_length;
+ return fold_convert (sizetype, segment_length);
}
/* Function vect_create_cond_for_alias_checks.
COND_EXPR - conditional expression.
COND_EXPR_STMT_LIST - statements needed to construct the conditional
expression.
+
+
The returned value is the conditional expression to be used in the if
statement that controls which version of the loop gets executed at runtime.
*/
for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
{
- tree stmt_a = DR_STMT (DDR_A (ddr));
- tree stmt_b = DR_STMT (DDR_B (ddr));
+ struct data_reference *dr_a, *dr_b;
+ tree dr_group_first_a, dr_group_first_b;
+ tree addr_base_a, addr_base_b;
+ tree segment_length_a, segment_length_b;
+ tree stmt_a, stmt_b;
- tree addr_base_a =
+ dr_a = DDR_A (ddr);
+ stmt_a = DR_STMT (DDR_A (ddr));
+ dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
+ if (dr_group_first_a)
+ {
+ stmt_a = dr_group_first_a;
+ dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
+ }
+
+ dr_b = DDR_B (ddr);
+ stmt_b = DR_STMT (DDR_B (ddr));
+ dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
+ if (dr_group_first_b)
+ {
+ stmt_b = dr_group_first_b;
+ dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
+ }
+
+ addr_base_a =
vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
NULL_TREE, loop);
- tree addr_base_b =
+ addr_base_b =
vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
NULL_TREE, loop);
- tree segment_length_a = vect_vfa_segment_size (DDR_A (ddr), vect_factor);
- tree segment_length_b = vect_vfa_segment_size (DDR_B (ddr), vect_factor);
+ segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
+ segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
if (vect_print_dump_info (REPORT_DR_DETAILS))
{
fprintf (vect_dump,
"create runtime check for data references ");
- print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
+ print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
fprintf (vect_dump, " and ");
- print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
+ print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
}
}
+/* Function vect_loop_versioning.
+
+ If the loop has data references that may or may not be aligned or/and
+ has data reference relations whose independence was not proven then
+ two versions of the loop need to be generated, one which is vectorized
+ and one which isn't. A test is then generated to control which of the
+ loops is executed. The test checks for the alignment of all of the
+ data references that may or may not be aligned. An additional
+ sequence of runtime tests is generated for each pairs of DDRs whose
+ independence was not proven. The vectorized version of loop is
+ executed only if both alias and alignment tests are passed. */
+
+static void
+vect_loop_versioning (loop_vec_info loop_vinfo)
+{
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ struct loop *nloop;
+ tree cond_expr = NULL_TREE;
+ tree cond_expr_stmt_list = NULL_TREE;
+ basic_block condition_bb;
+ block_stmt_iterator cond_exp_bsi;
+ basic_block merge_bb;
+ basic_block new_exit_bb;
+ edge new_exit_e, e;
+ tree orig_phi, new_phi, arg;
+ unsigned prob = 4 * REG_BR_PROB_BASE / 5;
+ tree gimplify_stmt_list;
+
+ if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+ && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+ return;
+
+ if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
+ cond_expr =
+ vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
+
+ if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
+ vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, &cond_expr_stmt_list);
+
+ cond_expr =
+ fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
+ cond_expr =
+ force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
+ NULL_TREE);
+ append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
+
+ initialize_original_copy_tables ();
+ nloop = loop_version (loop, cond_expr, &condition_bb,
+ prob, prob, REG_BR_PROB_BASE - prob, true);
+ free_original_copy_tables();
+
+ /* Loop versioning violates an assumption we try to maintain during
+ vectorization - that the loop exit block has a single predecessor.
+ After versioning, the exit block of both loop versions is the same
+ basic block (i.e. it has two predecessors). Just in order to simplify
+ following transformations in the vectorizer, we fix this situation
+ here by adding a new (empty) block on the exit-edge of the loop,
+ with the proper loop-exit phis to maintain loop-closed-form. */
+
+ merge_bb = single_exit (loop)->dest;
+ gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
+ new_exit_bb = split_edge (single_exit (loop));
+ new_exit_e = single_exit (loop);
+ e = EDGE_SUCC (new_exit_bb, 0);
+
+ for (orig_phi = phi_nodes (merge_bb); orig_phi;
+ orig_phi = PHI_CHAIN (orig_phi))
+ {
+ new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
+ new_exit_bb);
+ arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
+ add_phi_arg (new_phi, arg, new_exit_e);
+ SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
+ }
+
+ /* End loop-exit-fixes after versioning. */
+
+ update_ssa (TODO_update_ssa);
+ if (cond_expr_stmt_list)
+ {
+ cond_exp_bsi = bsi_last (condition_bb);
+ bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
+ }
+}
+
/* Remove a group of stores (for SLP or interleaving), free their
stmt_vec_info. */
return is_store;
}
-
/* Function vect_transform_loop.
The analysis phase has determined that the loop is vectorizable.
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vec_transform_loop ===");
-
- /* If the loop has data references that may or may not be aligned or/and
- has data reference relations whose independence was not proven then
- two versions of the loop need to be generated, one which is vectorized
- and one which isn't. A test is then generated to control which of the
- loops is executed. The test checks for the alignment of all of the
- data references that may or may not be aligned. An additional
- sequence of runtime tests is generated for each pairs of DDRs whose
- independence was not proven. The vectorized version of loop is
- executed only if both alias and alignment tests are passed. */
-
- if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
- || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
- {
- struct loop *nloop;
- tree cond_expr = NULL_TREE;
- tree cond_expr_stmt_list = NULL_TREE;
- basic_block condition_bb;
- block_stmt_iterator cond_exp_bsi;
- basic_block merge_bb;
- basic_block new_exit_bb;
- edge new_exit_e, e;
- tree orig_phi, new_phi, arg;
- unsigned prob = 4 * REG_BR_PROB_BASE / 5;
- tree gimplify_stmt_list;
-
- if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
- cond_expr =
- vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
-
- if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
- vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
- &cond_expr_stmt_list);
-
- cond_expr =
- fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
- cond_expr =
- force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
- NULL_TREE);
- append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
-
- initialize_original_copy_tables ();
- nloop = loop_version (loop, cond_expr, &condition_bb,
- prob, prob, REG_BR_PROB_BASE - prob, true);
- free_original_copy_tables();
-
- /** Loop versioning violates an assumption we try to maintain during
- vectorization - that the loop exit block has a single predecessor.
- After versioning, the exit block of both loop versions is the same
- basic block (i.e. it has two predecessors). Just in order to simplify
- following transformations in the vectorizer, we fix this situation
- here by adding a new (empty) block on the exit-edge of the loop,
- with the proper loop-exit phis to maintain loop-closed-form. **/
-
- merge_bb = single_exit (loop)->dest;
- gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
- new_exit_bb = split_edge (single_exit (loop));
- new_exit_e = single_exit (loop);
- e = EDGE_SUCC (new_exit_bb, 0);
-
- for (orig_phi = phi_nodes (merge_bb); orig_phi;
- orig_phi = PHI_CHAIN (orig_phi))
- {
- new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
- new_exit_bb);
- arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
- add_phi_arg (new_phi, arg, new_exit_e);
- SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
- }
-
- /** end loop-exit-fixes after versioning **/
-
- update_ssa (TODO_update_ssa);
- cond_exp_bsi = bsi_last (condition_bb);
- bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
- }
+ vect_loop_versioning (loop_vinfo);
/* CHECKME: we wouldn't need this if we called update_ssa once
for all loops. */