#include "cfgloop.h"
#include "expr.h"
#include "optabs.h"
+#include "params.h"
#include "recog.h"
#include "tree-data-ref.h"
#include "tree-chrec.h"
else
new_vect_var = create_tmp_var (type, prefix);
+ /* Mark vector typed variable as a gimple register variable. */
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ DECL_GIMPLE_REG_P (new_vect_var) = true;
+
return new_vect_var;
}
if (!MTAG_P (tag))
new_type_alias (vect_ptr, tag, DR_REF (dr));
else
- var_ann (vect_ptr)->symbol_mem_tag = tag;
+ set_symbol_mem_tag (vect_ptr, tag);
var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
vector stmt (each computing a vector of 'nunits' results, and together
computing 'VF' results in each iteration). This function is called when
vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
- which VF=16 and nuniti=4, so the number of copies required is 4):
+ which VF=16 and nunits=4, so the number of copies required is 4):
scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
return false;
}
- gcc_assert (!stmt_references_memory_p (stmt));
+ gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
for (args = TREE_OPERAND (operation, 1); args; args = TREE_CHAIN (args))
{
ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
gcc_assert (ncopies >= 1);
+
+ if (! INTEGRAL_TYPE_P (scalar_type)
+ || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+ return false;
/* Check the operands of the operation. */
if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
if (nunits_out != nunits_in / 2) /* FORNOW */
return false;
+ if (! INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
+ || !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
+ return false;
+
/* Check the operands of the operation. */
if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
{
vect1 = VEC_index (tree, dr_chain, j);
vect2 = VEC_index (tree, dr_chain, j+length/2);
- /* high = interleave_high (vect1, vect2); */
+ /* Create interleaving stmt:
+ in the case of big endian:
+ high = interleave_high (vect1, vect2)
+ and in the case of little endian:
+ high = interleave_low (vect1, vect2). */
perm_dest = create_tmp_var (vectype, "vect_inter_high");
+ DECL_GIMPLE_REG_P (perm_dest) = 1;
add_referenced_var (perm_dest);
- perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
- build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1,
- vect2));
+ if (BYTES_BIG_ENDIAN)
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
+ vect1, vect2));
+ else
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
+ vect1, vect2));
high = make_ssa_name (perm_dest, perm_stmt);
GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
vect_finish_stmt_generation (stmt, perm_stmt, bsi);
VEC_replace (tree, *result_chain, 2*j, high);
- /* low = interleave_low (vect1, vect2); */
+ /* Create interleaving stmt:
+ in the case of big endian:
+ low = interleave_low (vect1, vect2)
+ and in the case of little endian:
+ low = interleave_high (vect1, vect2). */
perm_dest = create_tmp_var (vectype, "vect_inter_low");
+ DECL_GIMPLE_REG_P (perm_dest) = 1;
add_referenced_var (perm_dest);
- perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
- build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1,
- vect2));
+ if (BYTES_BIG_ENDIAN)
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_LOW_EXPR, vectype,
+ vect1, vect2));
+ else
+ perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
+ build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype,
+ vect1, vect2));
low = make_ssa_name (perm_dest, perm_stmt);
GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
vect_finish_stmt_generation (stmt, perm_stmt, bsi);
S3: &base + 1 = x1
S4: &base + 3 = x3
- We create vectorized storess starting from base address (the access of the
+ We create vectorized stores starting from base address (the access of the
first stmt in the chain (S2 in the above example), when the last store stmt
of the chain (S4) is reached:
as an input to vect_permute_store_chain(), and OPRNDS as an input
to vect_get_vec_def_for_stmt_copy() for the next copy.
If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
- OPRNDS are of size 1.
- */
+ OPRNDS are of size 1. */
next_stmt = first_stmt;
for (i = 0; i < group_size; i++)
{
is the exact number of stmts in the chain. Therefore, NEXT_STMT
can't be NULL_TREE. In case that there is no interleaving,
GROUP_SIZE is 1, and only one iteration of the loop will be
- executed.
- */
+ executed. */
gcc_assert (next_stmt);
op = GIMPLE_STMT_OPERAND (next_stmt, 1);
vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
next copy.
If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
- OPRNDS are of size 1.
- */
+ OPRNDS are of size 1. */
for (i = 0; i < group_size; i++)
{
vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
vec_oprnd);
vect_finish_stmt_generation (stmt, new_stmt, bsi);
- /* Set the V_MAY_DEFS for the vector pointer. If this virtual def has a
- use outside the loop and a loop peel is performed then the def may be
- renamed by the peel. Mark it for renaming so the later use will also
- be renamed. */
+ /* Set the VDEFs for the vector pointer. If this virtual def
+ has a use outside the loop and a loop peel is performed
+ then the def may be renamed by the peel. Mark it for
+ renaming so the later use will also be renamed. */
copy_virtual_operands (new_stmt, next_stmt);
if (j == 0)
{
- /* The original store is deleted so the same SSA_NAMEs can be used.
- */
- FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VMAYDEF)
+ /* The original store is deleted so the same SSA_NAMEs
+ can be used. */
+ FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
{
SSA_NAME_DEF_STMT (def) = new_stmt;
mark_sym_for_renaming (SSA_NAME_VAR (def));
{
/* Create new names for all the definitions created by COPY and
add replacement mappings for each new name. */
- FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VMAYDEF)
+ FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
{
create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
}
prev_stmt_info = vinfo_for_stmt (new_stmt);
- next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
+ next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
if (!next_stmt)
break;
/* Bump the vector pointer. */
/* data_ref = permute_even (first_data_ref, second_data_ref); */
perm_dest = create_tmp_var (vectype, "vect_perm_even");
+ DECL_GIMPLE_REG_P (perm_dest) = 1;
add_referenced_var (perm_dest);
perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
/* data_ref = permute_odd (first_data_ref, second_data_ref); */
perm_dest = create_tmp_var (vectype, "vect_perm_odd");
+ DECL_GIMPLE_REG_P (perm_dest) = 1;
add_referenced_var (perm_dest);
perm_stmt = build2 (GIMPLE_MODIFY_STMT, void_type_node, perm_dest,
LOOP - the loop whose preheader will contain STMT.
It's possible to vectorize a loop even though an SSA_NAME from a VUSE
- appears to be defined in a V_MAY_DEF in another statement in a loop.
+ appears to be defined in a VDEF in another statement in a loop.
One such case is when the VUSE is at the dereference of a __restricted__
- pointer in a load and the V_MAY_DEF is at the dereference of a different
+ pointer in a load and the VDEF is at the dereference of a different
__restricted__ pointer in a store. Vectorization may result in
copy_virtual_uses being called to copy the problematic VUSE to a new
statement that is being inserted in the loop preheader. This procedure
edge update_e;
basic_block preheader;
int loop_num;
+ unsigned int th;
if (vect_print_dump_info (REPORT_DETAILS))
fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
&ratio_mult_vf_name, ratio);
loop_num = loop->num;
+ /* Threshold for vectorized loop. */
+ th = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)) *
+ LOOP_VINFO_VECT_FACTOR (loop_vinfo);
new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
- ratio_mult_vf_name, ni_name, false);
+ ratio_mult_vf_name, ni_name, false, th);
gcc_assert (new_loop);
gcc_assert (loop_num == loop->num);
#ifdef ENABLE_CHECKING
/* Peel the prolog loop and iterate it niters_of_prolog_loop. */
new_loop =
slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
- niters_of_prolog_loop, ni_name, true);
+ niters_of_prolog_loop, ni_name, true, 0);
gcc_assert (new_loop);
#ifdef ENABLE_CHECKING
slpeel_verify_cfg_after_peeling (new_loop, loop);
int i;
tree ratio = NULL;
int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
- bitmap_iterator bi;
- unsigned int j;
bool strided_store;
if (vect_print_dump_info (REPORT_DETAILS))
basic_block new_exit_bb;
edge new_exit_e, e;
tree orig_phi, new_phi, arg;
+ unsigned prob = 4 * REG_BR_PROB_BASE / 5;
cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
&cond_expr_stmt_list);
initialize_original_copy_tables ();
- nloop = loop_version (loop, cond_expr, &condition_bb, true);
+ nloop = loop_version (loop, cond_expr, &condition_bb,
+ prob, prob, REG_BR_PROB_BASE - prob, true);
free_original_copy_tables();
/** Loop versioning violates an assumption we try to maintain during
/* CHECKME: we wouldn't need this if we called update_ssa once
for all loops. */
- bitmap_zero (vect_vnames_to_rename);
+ bitmap_zero (vect_memsyms_to_rename);
/* Peel the loop if there are data refs with unknown alignment.
Only one data ref with unknown store is allowed. */
slpeel_make_loop_iterate_ntimes (loop, ratio);
- EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
- mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
+ mark_set_for_renaming (vect_memsyms_to_rename);
/* The memory tags and pointers in vectorized statements need to
have their SSA forms updated. FIXME, why can't this be delayed