static int vect_min_worthwhile_factor (enum tree_code);
+/* Function vect_estimate_min_profitable_iters
+
+ Return the number of iterations required for the vector version of the
+ loop to be profitable relative to the cost of the scalar version of the
+ loop.
+
+ TODO: Take profile info into account before making vectorization
+ decisions, if available. */
+
+int
+vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
+{
+ int i;
+ int min_profitable_iters;
+ int peel_iters_prologue;
+ int peel_iters_epilogue;
+ int vec_inside_cost = 0;
+ int vec_outside_cost = 0;
+ int scalar_single_iter_cost = 0;
+ int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+ struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+ basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+ int nbbs = loop->num_nodes;
+
+ /* Cost model disabled. */
+ if (!flag_vect_cost_model)
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model disabled.");
+ return 0;
+ }
+
+ /* Requires loop versioning tests to handle misalignment.
+ FIXME: Make cost depend on number of stmts in may_misalign list. */
+
+ if (LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
+ {
+ vec_outside_cost += TARG_COND_BRANCH_COST;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: Adding cost of checks for loop "
+ "versioning.\n");
+ }
+
+ /* Requires a prologue loop when peeling to handle misalignment. Add cost of
+ two guards, one for the peeled loop and one for the vector loop. */
+
+ peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+ if (peel_iters_prologue)
+ {
+ vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: Adding cost of checks for "
+ "prologue.\n");
+ }
+
+ /* Requires an epilogue loop to finish up remaining iterations after vector
+ loop. Add cost of two guards, one for the peeled loop and one for the
+ vector loop. */
+
+ if ((peel_iters_prologue < 0)
+ || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
+ {
+ vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model : Adding cost of checks for "
+ "epilogue.\n");
+ }
+
+ /* Count statements in scalar loop. Using this as scalar cost for a single
+ iteration for now.
+
+ TODO: Add outer loop support.
+
+ TODO: Consider assigning different costs to different scalar
+ statements. */
+
+ for (i = 0; i < nbbs; i++)
+ {
+ block_stmt_iterator si;
+ basic_block bb = bbs[i];
+
+ for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
+ {
+ tree stmt = bsi_stmt (si);
+ stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
+ if (!STMT_VINFO_RELEVANT_P (stmt_info)
+ && !STMT_VINFO_LIVE_P (stmt_info))
+ continue;
+ scalar_single_iter_cost++;
+ vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
+ vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
+ }
+ }
+
+ /* Add additional cost for the peeled instructions in prologue and epilogue
+ loop.
+
+ FORNOW: If we dont know the value of peel_iters for prologue or epilogue
+ at compile-time - we assume the worst.
+
+ TODO: Build an expression that represents peel_iters for prologue and
+ epilogue to be used in a run-time test. */
+
+ peel_iters_prologue = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
+
+ if (peel_iters_prologue < 0)
+ {
+ peel_iters_prologue = vf - 1;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: "
+ "prologue peel iters set conservatively.");
+
+ /* If peeling for alignment is unknown, loop bound of main loop becomes
+ unknown. */
+ peel_iters_epilogue = vf - 1;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: "
+ "epilogue peel iters set conservatively because "
+ "peeling for alignment is unknown .");
+ }
+ else
+ {
+ if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+ {
+ peel_iters_epilogue = vf - 1;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: "
+ "epilogue peel iters set conservatively because "
+ "loop iterations are unknown .");
+ }
+ else
+ peel_iters_epilogue =
+ (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_iters_prologue)
+ % vf;
+ }
+
+ vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
+ + (peel_iters_epilogue * scalar_single_iter_cost);
+
+ /* Calculate number of iterations required to make the vector version
+ profitable, relative to the loop bodies only. The following condition
+ must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
+ SIC = scalar iteration cost, VIC = vector iteration cost,
+ VOC = vector outside cost and VF = vectorization factor. */
+
+ if ((scalar_single_iter_cost * vf) > vec_inside_cost)
+ {
+ if (vec_outside_cost == 0)
+ min_profitable_iters = 1;
+ else
+ {
+ min_profitable_iters = (vec_outside_cost * vf)
+ / ((scalar_single_iter_cost * vf)
+ - vec_inside_cost);
+
+ if ((scalar_single_iter_cost * vf * min_profitable_iters)
+ <= ((vec_inside_cost * min_profitable_iters)
+ + (vec_outside_cost * vf)))
+ min_profitable_iters++;
+ }
+ }
+ /* vector version will never be profitable. */
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "cost model: vector iteration cost = %d "
+ "is divisible by scalar iteration cost = %d by a factor "
+ "greater than or equal to the vectorization factor = %d .",
+ vec_inside_cost, scalar_single_iter_cost, vf);
+ return -1;
+ }
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ {
+ fprintf (vect_dump, "Cost model analysis: \n");
+ fprintf (vect_dump, " Vector inside of loop cost: %d\n",
+ vec_inside_cost);
+ fprintf (vect_dump, " Vector outside of loop cost: %d\n",
+ vec_outside_cost);
+ fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
+ fprintf (vect_dump, " prologue iterations: %d\n",
+ peel_iters_prologue);
+ fprintf (vect_dump, " epilogue iterations: %d\n",
+ peel_iters_epilogue);
+ fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
+ min_profitable_iters);
+ fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
+ min_profitable_iters < vf ? vf : min_profitable_iters);
+ }
+
+ return min_profitable_iters < vf ? vf : min_profitable_iters;
+}
+
+
+/* TODO: Close dependency between vect_model_*_cost and vectorizable_*
+ functions. Design better to avoid maintenance issues. */
+
+/* Function vect_model_reduction_cost.
+
+ Models cost for a reduction operation, including the vector ops
+ generated within the strip-mine loop, the initial definition before
+ the loop, and the epilogue code that must be generated. */
+
+static void
+vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
+ int ncopies)
+{
+ int outer_cost = 0;
+ enum tree_code code;
+ optab optab;
+ tree vectype;
+ tree orig_stmt;
+ tree reduction_op;
+ enum machine_mode mode;
+ tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
+ int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
+
+ /* Cost of reduction op inside loop. */
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
+
+ reduction_op = TREE_OPERAND (operation, op_type-1);
+ vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
+ mode = TYPE_MODE (vectype);
+ orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
+
+ if (!orig_stmt)
+ orig_stmt = STMT_VINFO_STMT (stmt_info);
+
+ code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
+
+ /* Add in cost for initial definition. */
+ outer_cost += TARG_VEC_STMT_COST;
+
+ /* Determine cost of epilogue code.
+
+ We have a reduction operator that will reduce the vector in one statement.
+ Also requires scalar extract. */
+
+ if (reduc_code < NUM_TREE_CODES)
+ outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
+ else
+ {
+ int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
+ tree bitsize =
+ TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
+ int element_bitsize = tree_low_cst (bitsize, 1);
+ int nelements = vec_size_in_bits / element_bitsize;
+
+ optab = optab_for_tree_code (code, vectype);
+
+ /* We have a whole vector shift available. */
+ if (!VECTOR_MODE_P (mode)
+ || optab->handlers[mode].insn_code == CODE_FOR_nothing)
+ /* Final reduction via vector shifts and the reduction operator. Also
+ requires scalar extract. */
+ outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
+ else
+ /* Use extracts and reduction op for final reduction. For N elements,
+ we have N extracts and N-1 reduction ops. */
+ outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
+ }
+
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_induction_cost.
+
+ Models cost for induction operations. */
+
+static void
+vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ /* loop cost for vec_loop. */
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
+ /* prologue cost for vec_init and vec_step. */
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_simple_cost.
+
+ Models cost for simple operations, i.e. those that only emit ncopies of a
+ single op. Right now, this does not account for multiple insns that could
+ be generated for the single vector op. We will handle that shortly. */
+
+static void
+vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_cost_strided_group_size
+
+ For strided load or store, return the group_size only if it is the first
+ load or store of a group, else return 1. This ensures that group size is
+ only returned once per group. */
+
+static int
+vect_cost_strided_group_size (stmt_vec_info stmt_info)
+{
+ tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+
+ if (first_stmt == STMT_VINFO_STMT (stmt_info))
+ return DR_GROUP_SIZE (stmt_info);
+
+ return 1;
+}
+
+
+/* Function vect_model_store_cost
+
+ Models cost for stores. In the case of strided accesses, one access
+ has the overhead of the strided access attributed to it. */
+
+static void
+vect_model_store_cost (stmt_vec_info stmt_info, int ncopies)
+{
+ int cost = 0;
+ int group_size;
+
+ /* Strided access? */
+ if (DR_GROUP_FIRST_DR (stmt_info))
+ group_size = vect_cost_strided_group_size (stmt_info);
+ /* Not a strided access. */
+ else
+ group_size = 1;
+
+ /* Is this an access in a group of stores, which provide strided access?
+ If so, add in the cost of the permutes. */
+ if (group_size > 1)
+ {
+ /* Uses a high and low interleave operation for each needed permute. */
+ cost = ncopies * exact_log2(group_size) * group_size
+ * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
+ group_size);
+
+ }
+
+ /* Costs of the stores. */
+ cost += ncopies * TARG_VEC_STORE_COST;
+
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+}
+
+
+/* Function vect_model_load_cost
+
+ Models cost for loads. In the case of strided accesses, the last access
+ has the overhead of the strided access attributed to it. Since unaligned
+ accesses are supported for loads, we also account for the costs of the
+ access scheme chosen. */
+
+static void
+vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
+
+{
+ int inner_cost = 0;
+ int group_size;
+ int alignment_support_cheme;
+ tree first_stmt;
+ struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
+
+ /* Strided accesses? */
+ first_stmt = DR_GROUP_FIRST_DR (stmt_info);
+ if (first_stmt)
+ {
+ group_size = vect_cost_strided_group_size (stmt_info);
+ first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
+ }
+ /* Not a strided access. */
+ else
+ {
+ group_size = 1;
+ first_dr = dr;
+ }
+
+ alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
+
+ /* Is this an access in a group of loads providing strided access?
+ If so, add in the cost of the permutes. */
+ if (group_size > 1)
+ {
+ /* Uses an even and odd extract operations for each needed permute. */
+ inner_cost = ncopies * exact_log2(group_size) * group_size
+ * TARG_VEC_STMT_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
+ group_size);
+
+ }
+
+ /* The loads themselves. */
+ switch (alignment_support_cheme)
+ {
+ case dr_aligned:
+ {
+ inner_cost += ncopies * TARG_VEC_LOAD_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: aligned.");
+
+ break;
+ }
+ case dr_unaligned_supported:
+ {
+ /* Here, we assign an additional cost for the unaligned load. */
+ inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
+ "hardware.");
+
+ break;
+ }
+ case dr_unaligned_software_pipeline:
+ {
+ int outer_cost = 0;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: unaligned software "
+ "pipelined.");
+
+ /* Unaligned software pipeline has a load of an address, an initial
+ load, and possibly a mask operation to "prime" the loop. However,
+ if this is an access in a group of loads, which provide strided
+ access, then the above cost should only be considered for one
+ access in the group. Inside the loop, there is a load op
+ and a realignment op. */
+
+ if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
+ {
+ outer_cost = 2*TARG_VEC_STMT_COST;
+ if (targetm.vectorize.builtin_mask_for_load)
+ outer_cost += TARG_VEC_STMT_COST;
+ }
+
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
+
+ inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
+
+ break;
+ }
+
+ default:
+ gcc_unreachable ();
+ }
+
+ STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
+ "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
+ STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
+
+}
+
+
/* Function vect_get_new_vect_var.
Returns a name for a new variable. The current naming scheme appends the
/* Create base_offset */
base_offset = size_binop (PLUS_EXPR, base_offset, init);
+ base_offset = fold_convert (sizetype, base_offset);
dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
add_referenced_var (dest);
base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
if (offset)
{
- tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
+ tree tmp = create_tmp_var (sizetype, "offset");
tree step;
/* For interleaved access step we divide STEP by the size of the
}
/* base + base_offset */
- addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
+ addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
base_offset);
vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
tree vptr_type = TREE_TYPE (dataref_ptr);
tree ptr_var = SSA_NAME_VAR (dataref_ptr);
- tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
+ tree update = TYPE_SIZE_UNIT (vectype);
tree incr_stmt;
ssa_op_iter iter;
use_operand_p use_p;
tree new_dataref_ptr;
incr_stmt = build_gimple_modify_stmt (ptr_var,
- build2 (PLUS_EXPR, vptr_type,
+ build2 (POINTER_PLUS_EXPR, vptr_type,
dataref_ptr, update));
new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
return true;
}
tree scalar_dest;
tree operation;
tree op, type;
+ tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
tree vectype_out, vectype_in;
+ int nunits_in;
+ int nunits_out;
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
enum vect_def_type dt[2];
+ tree new_stmt;
int ncopies, j, nargs;
call_expr_arg_iterator iter;
+ tree vargs;
+ enum { NARROW, NONE, WIDEN } modifier;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
nargs = 0;
FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
{
- ++nargs;
-
/* Bail out if the function has more than two arguments, we
do not have interesting builtin functions to vectorize with
more than two arguments. */
- if (nargs > 2)
+ if (nargs >= 2)
return false;
/* We can only handle calls with arguments of the same type. */
}
rhs_type = TREE_TYPE (op);
- if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs-1]))
+ if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
{
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "use not simple.");
return false;
}
+
+ ++nargs;
}
/* No arguments is also not good. */
return false;
vectype_in = get_vectype_for_scalar_type (rhs_type);
+ nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
vectype_out = get_vectype_for_scalar_type (lhs_type);
+ nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
- /* Only handle the case of vectors with the same number of elements.
- FIXME: We need a way to handle for example the SSE2 cvtpd2dq
- instruction which converts V2DFmode to V4SImode but only
- using the lower half of the V4SImode result. */
- if (TYPE_VECTOR_SUBPARTS (vectype_in) != TYPE_VECTOR_SUBPARTS (vectype_out))
+ /* FORNOW */
+ if (nunits_in == nunits_out / 2)
+ modifier = NARROW;
+ else if (nunits_out == nunits_in)
+ modifier = NONE;
+ else if (nunits_out == nunits_in / 2)
+ modifier = WIDEN;
+ else
return false;
/* For now, we only vectorize functions if a target specific builtin
gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
+ if (modifier == NARROW)
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
+ else
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+
+ /* Sanity check: make sure that at least one copy of the vectorized stmt
+ needs to be generated. */
+ gcc_assert (ncopies >= 1);
+
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_call ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform operation.");
- ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
- / TYPE_VECTOR_SUBPARTS (vectype_out));
- gcc_assert (ncopies >= 1);
-
/* Handle def. */
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
prev_stmt_info = NULL;
- for (j = 0; j < ncopies; ++j)
- {
- tree new_stmt, vargs;
- tree vec_oprnd[2];
- int n;
-
- /* Build argument list for the vectorized call. */
- /* FIXME: Rewrite this so that it doesn't construct a temporary
- list. */
- vargs = NULL_TREE;
- n = -1;
- FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
+ switch (modifier)
+ {
+ case NONE:
+ for (j = 0; j < ncopies; ++j)
+ {
+ /* Build argument list for the vectorized call. */
+ /* FIXME: Rewrite this so that it doesn't
+ construct a temporary list. */
+ vargs = NULL_TREE;
+ nargs = 0;
+ FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
+ {
+ if (j == 0)
+ vec_oprnd0
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ else
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
+
+ vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
+
+ ++nargs;
+ }
+ vargs = nreverse (vargs);
+
+ rhs = build_function_call_expr (fndecl, vargs);
+ new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
+
+ if (j == 0)
+ STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
+ else
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
+
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
+ }
+
+ break;
+
+ case NARROW:
+ for (j = 0; j < ncopies; ++j)
{
- ++n;
+ /* Build argument list for the vectorized call. */
+ /* FIXME: Rewrite this so that it doesn't
+ construct a temporary list. */
+ vargs = NULL_TREE;
+ nargs = 0;
+ FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
+ {
+ if (j == 0)
+ {
+ vec_oprnd0
+ = vect_get_vec_def_for_operand (op, stmt, NULL);
+ vec_oprnd1
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
+ }
+ else
+ {
+ vec_oprnd0
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
+ vec_oprnd1
+ = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
+ }
+
+ vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
+ vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
+
+ ++nargs;
+ }
+ vargs = nreverse (vargs);
+
+ rhs = build_function_call_expr (fndecl, vargs);
+ new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
+ new_temp = make_ssa_name (vec_dest, new_stmt);
+ GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+
+ vect_finish_stmt_generation (stmt, new_stmt, bsi);
if (j == 0)
- vec_oprnd[n] = vect_get_vec_def_for_operand (op, stmt, NULL);
+ STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
else
- vec_oprnd[n] = vect_get_vec_def_for_stmt_copy (dt[n], vec_oprnd[n]);
+ STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- vargs = tree_cons (NULL_TREE, vec_oprnd[n], vargs);
+ prev_stmt_info = vinfo_for_stmt (new_stmt);
}
- vargs = nreverse (vargs);
- rhs = build_function_call_expr (fndecl, vargs);
- new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
- new_temp = make_ssa_name (vec_dest, new_stmt);
- GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
+ *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
- vect_finish_stmt_generation (stmt, new_stmt, bsi);
+ break;
- if (j == 0)
- STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
- else
- STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
- prev_stmt_info = vinfo_for_stmt (new_stmt);
+ case WIDEN:
+ /* No current target implements this case. */
+ return false;
}
- /* The call in STMT might prevent it from being removed in dce. We however
- cannot remove it here, due to the way the ssa name it defines is mapped
- to the new definition. So just replace rhs of the statement with something
- harmless. */
+ /* The call in STMT might prevent it from being removed in dce.
+ We however cannot remove it here, due to the way the ssa name
+ it defines is mapped to the new definition. So just replace
+ rhs of the statement with something harmless. */
type = TREE_TYPE (scalar_dest);
GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
+ update_stmt (stmt);
return true;
}
tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
+ enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE;
tree new_temp;
tree def, def_stmt;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_assignment ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_induction ===");
+ vect_model_induction_cost (stmt_info, ncopies);
return true;
}
operation = GIMPLE_STMT_OPERAND (stmt, 1);
code = TREE_CODE (operation);
+
+ /* For pointer addition, we should use the normal plus for
+ the vector addition. */
+ if (code == POINTER_PLUS_EXPR)
+ code = PLUS_EXPR;
+
optab = optab_for_tree_code (code, vectype);
/* Support only unary or binary operations. */
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_operation ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- enum tree_code code, code1 = CODE_FOR_nothing;
+ enum tree_code code, code1 = ERROR_MARK;
tree new_temp;
tree def, def_stmt;
enum vect_def_type dt0;
int j;
tree expr;
tree vectype_in;
- tree scalar_type;
if (!STMT_VINFO_RELEVANT_P (stmt_info))
return false;
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
- scalar_type = TREE_TYPE (scalar_dest);
- vectype_out = get_vectype_for_scalar_type (scalar_type);
+ vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
if (nunits_in != nunits_out / 2) /* FORNOW */
return false;
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_demotion ===");
+ vect_model_simple_cost (stmt_info, ncopies);
return true;
}
tree vec_oprnd0=NULL, vec_oprnd1=NULL;
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
- enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
+ enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
tree decl1 = NULL_TREE, decl2 = NULL_TREE;
int op_type;
tree def, def_stmt;
op0 = TREE_OPERAND (operation, 0);
vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
- ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
- gcc_assert (ncopies >= 1);
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
if (nunits_out != nunits_in / 2) /* FORNOW */
return false;
+ ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
+ gcc_assert (ncopies >= 1);
+
if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
&& INTEGRAL_TYPE_P (TREE_TYPE (op0)))
|| (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "=== vectorizable_promotion ===");
+ vect_model_simple_cost (stmt_info, 2*ncopies);
return true;
}
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+ vect_model_store_cost (stmt_info, ncopies);
return true;
}
/** Transform. **/
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
-
if (strided_store)
{
first_stmt = DR_GROUP_FIRST_DR (stmt_info);
group_size = 1;
}
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
+
dr_chain = VEC_alloc (tree, heap, group_size);
oprnds = VEC_alloc (tree, heap, group_size);
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+ vect_model_load_cost (stmt_info, ncopies);
return true;
}
- /** Transform. **/
-
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "transform load.");
+ /** Transform. **/
+
if (strided_load)
{
first_stmt = DR_GROUP_FIRST_DR (stmt_info);
init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
loop->num));
- ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
- fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
- fold_convert (TREE_TYPE (init_expr),
- niters),
- step_expr),
- init_expr);
+ if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
+ ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
+ init_expr,
+ fold_convert (sizetype,
+ fold_build2 (MULT_EXPR, TREE_TYPE (niters),
+ niters, step_expr)));
+ else
+ ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
+ fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
+ fold_convert (TREE_TYPE (init_expr),
+ niters),
+ step_expr),
+ init_expr);
+
+
var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
add_referenced_var (var);
basic_block preheader;
int loop_num;
unsigned int th;
+ int min_scalar_loop_bound;
+ int min_profitable_iters;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
&ratio_mult_vf_name, ratio);
loop_num = loop->num;
- /* Threshold for vectorized loop. */
- th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) *
- LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ /* Analyze cost to set threshhold for vectorized loop. */
+ min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
+
+ min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
+ * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+
+ /* Use the cost model only if it is more conservative than user specified
+ threshold. */
+
+ th = (unsigned) min_scalar_loop_bound;
+ if (min_profitable_iters
+ && (!min_scalar_loop_bound
+ || min_profitable_iters > min_scalar_loop_bound))
+ th = (unsigned) min_profitable_iters;
+
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vectorization may not be profitable.");
+
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
- ratio_mult_vf_name, ni_name, false, th);
+ ratio_mult_vf_name, ni_name, false,
+ th);
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#ifdef ENABLE_CHECKING
/* Create: byte_misalign = addr & (vectype_size - 1) */
byte_misalign =
- fold_build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
+ fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
/* Create: elem_misalign = byte_misalign / element_size */
elem_misalign =