/* Transformation Utilities for Loop Vectorization.
- Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
This file is part of GCC.
/* Add additional cost for the peeled instructions in prologue and epilogue
loop.
- FORNOW: If we dont know the value of peel_iters for prologue or epilogue
+ FORNOW: If we don't know the value of peel_iters for prologue or epilogue
at compile-time - we assume it's vf/2 (the worst would be vf-1).
TODO: Build an expression that represents peel_iters for prologue and
branches.
TODO: The back end may reorder the BBS's differently and reverse
- conditions/branch directions. Change the stimates below to
+ conditions/branch directions. Change the estimates below to
something more reasonable. */
if (runtime_test)
int element_bitsize = tree_low_cst (bitsize, 1);
int nelements = vec_size_in_bits / element_bitsize;
- optab = optab_for_tree_code (code, vectype);
+ optab = optab_for_tree_code (code, vectype, optab_default);
/* We have a whole vector shift available. */
if (VECTOR_MODE_P (mode)
else
set_symbol_mem_tag (vect_ptr, tag);
- var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
-
/** Note: If the dataref is in an inner-loop nested in LOOP, and we are
vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
def-use update cycles for the pointer: One relative to the outer-loop
new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
offset, loop);
pe = loop_preheader_edge (loop);
- new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
- gcc_assert (!new_bb);
+ if (new_stmt_list)
+ {
+ new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
+ gcc_assert (!new_bb);
+ }
+
*initial_address = new_temp;
/* Create: p = (vectype *) initial_base */
created vectors. It is greater than 1 if unrolling is performed.
For example, we have two scalar operands, s1 and s2 (e.g., group of
- strided accesses of size two), while NUINTS is four (i.e., four scalars
+ strided accesses of size two), while NUNITS is four (i.e., four scalars
of this type can be packed in a vector). The output vector will contain
two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
will be 2).
If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
containing the operands.
- For example, NUINTS is four as before, and the group size is 8
+ For example, NUNITS is four as before, and the group size is 8
(s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
{s5, s6, s7, s8}. */
/* Make sure bsi points to the stmt that is being vectorized. */
gcc_assert (stmt == bsi_stmt (*bsi));
-#ifdef USE_MAPPED_LOCATION
SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
-#else
- SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
-#endif
}
have_whole_vector_shift = false;
else
{
- optab optab = optab_for_tree_code (code, vectype);
+ optab optab = optab_for_tree_code (code, vectype, optab_default);
if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
have_whole_vector_shift = false;
}
vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
bitsize_zero_node);
- BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
bitpos);
- BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
bitpos = bitsize_zero_node;
rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
- BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
/* 4. Supportable by target? */
/* 4.1. check support for the operation in the loop */
- optab = optab_for_tree_code (code, vectype);
+ optab = optab_for_tree_code (code, vectype, optab_default);
if (!optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
return false;
- reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
+ reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, optab_default);
if (!reduc_optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
}
+ if (vec_oprnds0)
+ VEC_free (tree, heap, vec_oprnds0);
+
return true;
}
VEC(tree,heap) *vec_oprnds = NULL;
tree vop;
+ /* FORNOW: SLP with multiple types is not supported. The SLP analysis
+ verifies this, so we can safely override NCOPIES with 1 here. */
+ if (slp_node)
+ ncopies = 1;
+
gcc_assert (ncopies >= 1);
if (ncopies > 1)
return false; /* FORNOW */
VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
tree vop0, vop1;
unsigned int k;
+ bool shift_p = false;
bool scalar_shift_arg = false;
/* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
if (code == POINTER_PLUS_EXPR)
code = PLUS_EXPR;
- optab = optab_for_tree_code (code, vectype);
-
/* Support only unary or binary operations. */
op_type = TREE_OPERAND_LENGTH (operation);
if (op_type != unary_op && op_type != binary_op)
}
}
+ /* If this is a shift/rotate, determine whether the shift amount is a vector,
+ or scalar. If the shift/rotate amount is a vector, use the vector/vector
+ shift optabs. */
+ if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
+ || code == RROTATE_EXPR)
+ {
+ shift_p = true;
+
+ /* vector shifted by vector */
+ if (dt[1] == vect_loop_def)
+ {
+ optab = optab_for_tree_code (code, vectype, optab_vector);
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vector/vector shift/rotate found.");
+ }
+
+ /* See if the machine has a vector shifted by scalar insn and if not
+ then see if it has a vector shifted by vector insn */
+ else if (dt[1] == vect_constant_def || dt[1] == vect_invariant_def)
+ {
+ optab = optab_for_tree_code (code, vectype, optab_scalar);
+ if (optab
+ && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
+ != CODE_FOR_nothing))
+ {
+ scalar_shift_arg = true;
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "vector/scalar shift/rotate found.");
+ }
+ else
+ {
+ optab = optab_for_tree_code (code, vectype, optab_vector);
+ if (vect_print_dump_info (REPORT_DETAILS)
+ && optab
+ && (optab_handler (optab, TYPE_MODE (vectype))->insn_code
+ != CODE_FOR_nothing))
+ fprintf (vect_dump, "vector/vector shift/rotate found.");
+ }
+ }
+
+ else
+ {
+ if (vect_print_dump_info (REPORT_DETAILS))
+ fprintf (vect_dump, "operand mode requires invariant argument.");
+ return false;
+ }
+ }
+ else
+ optab = optab_for_tree_code (code, vectype, optab_default);
+
/* Supportable by target? */
if (!optab)
{
return false;
}
- if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
- {
- /* FORNOW: not yet supported. */
- if (!VECTOR_MODE_P (vec_mode))
- return false;
-
- /* Invariant argument is needed for a vector shift
- by a scalar shift operand. */
- optab_op2_mode = insn_data[icode].operand[2].mode;
- if (!VECTOR_MODE_P (optab_op2_mode))
- {
- if (dt[1] != vect_constant_def && dt[1] != vect_invariant_def)
- {
- if (vect_print_dump_info (REPORT_DETAILS))
- fprintf (vect_dump, "operand mode requires invariant"
- " argument.");
- return false;
- }
-
- scalar_shift_arg = true;
- }
- }
-
if (!vec_stmt) /* transformation not required. */
{
STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
/* Handle uses. */
if (j == 0)
{
- if (op_type == binary_op
- && (code == LSHIFT_EXPR || code == RSHIFT_EXPR))
+ if (op_type == binary_op && scalar_shift_arg)
{
/* Vector shl and shr insn patterns can be defined with scalar
operand 2 (shift operand). In this case, use constant or loop
/* Arguments are ready. Create the new vector stmt. We are creating
two vector defs because the widened result does not fit in one vector.
- The vectorized stmt can be expressed as a call to a taregt builtin,
+ The vectorized stmt can be expressed as a call to a target builtin,
or a using a tree-code. */
/* Generate first half of the widened result: */
new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
/* Check that the operation is supported. */
interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
- vectype);
+ vectype, optab_default);
interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
- vectype);
+ vectype, optab_default);
if (!interleave_high_optab || !interleave_low_optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
tree scalar_dest, tmp;
int i;
unsigned int j;
- VEC(tree,heap) *first, *second;
scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
- first = VEC_alloc (tree, heap, length/2);
- second = VEC_alloc (tree, heap, length/2);
/* Check that the operation is supported. */
if (!vect_strided_store_supported (vectype))
}
}
+ VEC_free (tree, heap, dr_chain);
+ VEC_free (tree, heap, oprnds);
+ if (result_chain)
+ VEC_free (tree, heap, result_chain);
+
return true;
}
mode = (int) TYPE_MODE (vectype);
- perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
+ perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
+ optab_default);
if (!perm_even_optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
return false;
}
- perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
+ perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
+ optab_default);
if (!perm_odd_optab)
{
if (vect_print_dump_info (REPORT_DETAILS))
break;
/* Skip the gaps. Loads created for the gaps will be removed by dead
- code elimination pass later.
+ code elimination pass later. No need to check for the first stmt in
+ the group, since it always exists.
DR_GROUP_GAP is the number of steps in elements from the previous
access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
correspond to the gaps.
*/
- if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
+ if (next_stmt != first_stmt
+ && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
{
gap_count++;
continue;
break;
}
}
+
+ VEC_free (tree, heap, result_chain);
return true;
}
nested within an outer-loop that is being vectorized. */
if (nested_in_vect_loop_p (loop, stmt)
- && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
+ && (TREE_INT_CST_LOW (DR_STEP (dr))
+ % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0))
{
gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
compute_in_loop = true;
bitpos = bitsize_zero_node;
vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
bitsize, bitpos);
- BIT_FIELD_REF_UNSIGNED (vec_inv) =
- TYPE_UNSIGNED (scalar_type);
vec_dest =
vect_create_destination_var (scalar_dest, NULL_TREE);
new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
return false;
*vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+ VEC_free (tree, heap, dr_chain);
dr_chain = VEC_alloc (tree, heap, group_size);
}
else
}
}
+ if (dr_chain)
+ VEC_free (tree, heap, dr_chain);
+
return true;
}
vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
- /* Arguments are ready. create the new vector stmt. */
+ /* Arguments are ready. Create the new vector stmt. */
vec_compare = build2 (TREE_CODE (cond_expr), vectype,
vec_cond_lhs, vec_cond_rhs);
vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
static void
vect_remove_stores (tree first_stmt)
{
- stmt_ann_t ann;
tree next = first_stmt;
tree tmp;
- stmt_vec_info next_stmt_info;
block_stmt_iterator next_si;
while (next)
/* Free the attached stmt_vec_info and remove the stmt. */
next_si = bsi_for_stmt (next);
bsi_remove (&next_si, true);
- next_stmt_info = vinfo_for_stmt (next);
- ann = stmt_ann (next);
- tmp = DR_GROUP_NEXT_DR (next_stmt_info);
- free (next_stmt_info);
- set_stmt_info (ann, NULL);
+ tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
+ free_stmt_vec_info (next);
next = tmp;
}
}
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
int nbbs = loop->num_nodes;
- block_stmt_iterator si, next_si;
+ block_stmt_iterator si;
int i;
tree ratio = NULL;
int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
if (is_store)
{
- stmt_ann_t ann;
if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
{
/* Interleaving. If IS_STORE is TRUE, the vectorization of the
interleaving chain was completed - free all the stores in
the chain. */
- tree next = DR_GROUP_FIRST_DR (stmt_info);
- tree tmp;
- stmt_vec_info next_stmt_info;
-
- while (next)
- {
- next_si = bsi_for_stmt (next);
- next_stmt_info = vinfo_for_stmt (next);
- /* Free the attached stmt_vec_info and remove the stmt. */
- ann = stmt_ann (next);
- tmp = DR_GROUP_NEXT_DR (next_stmt_info);
- free (next_stmt_info);
- set_stmt_info (ann, NULL);
- bsi_remove (&next_si, true);
- next = tmp;
- }
+ vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
bsi_remove (&si, true);
continue;
}
else
{
/* Free the attached stmt_vec_info and remove the stmt. */
- ann = stmt_ann (stmt);
- free (stmt_info);
- set_stmt_info (ann, NULL);
+ free_stmt_vec_info (stmt);
bsi_remove (&si, true);
continue;
}