/* Loop Vectorization
- Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
+ Foundation, Inc.
Contributed by Dorit Naishlos <dorit@il.ibm.com>
This file is part of GCC.
I.E., the overall structure is:
loop1_preheader_bb:
- guard1 (goto loop1/merg1_bb)
+ guard1 (goto loop1/merge1_bb)
loop1
loop1_exit_bb:
guard2 (goto merge1_bb/merge2_bb)
In the context of the overall structure, we have:
loop1_preheader_bb:
- guard1 (goto loop1/merg1_bb)
+ guard1 (goto loop1/merge1_bb)
LOOP-> loop1
loop1_exit_bb:
guard2 (goto merge1_bb/merge2_bb)
{
/* Virtual phi; Mark it for renaming. We actually want to call
mar_sym_for_renaming, but since all ssa renaming datastructures
- are going to be freed before we get to call ssa_upate, we just
+ are going to be freed before we get to call ssa_update, we just
record this name for now in a bitmap, and will mark it for
renaming later. */
name = PHI_RESULT (orig_phi);
In the context of the overall structure, we have:
loop1_preheader_bb:
- guard1 (goto loop1/merg1_bb)
+ guard1 (goto loop1/merge1_bb)
loop1
loop1_exit_bb:
guard2 (goto merge1_bb/merge2_bb)
basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
/* A guard that controls whether the second_loop is to be executed or skipped
- is placed in first_loop->exit. first_loopt->exit therefore has two
+ is placed in first_loop->exit. first_loop->exit therefore has two
successors - one is the preheader of second_loop, and the other is a bb
after second_loop.
*/
gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
- /* 1. Verify that one of the successors of first_loopt->exit is the preheader
+ /* 1. Verify that one of the successors of first_loop->exit is the preheader
of second_loop. */
/* The preheader of new_loop is expected to have two predecessors:
|| (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
&& EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
- /* Verify that the other successor of first_loopt->exit is after the
+ /* Verify that the other successor of first_loop->exit is after the
second_loop. */
/* TODO */
}
is false, the caller of this function may want to take care of this
(this can be useful if we don't want new stmts added to first-loop).
- TH: cost model profitability threshold of iterations for vectorization.
- - CHECK_PROFITABILITY: specify whether cost model check has not occured
+ - CHECK_PROFITABILITY: specify whether cost model check has not occurred
during versioning and hence needs to occur during
prologue generation or whether cost model check
- has not occured during prologue generation and hence
+ has not occurred during prologue generation and hence
needs to occur during epilogue generation.
/* 2. Add the guard code in one of the following ways:
2.a Add the guard that controls whether the first loop is executed.
- This occurs when this function is invoked for prologue or epilogiue
+ This occurs when this function is invoked for prologue or epilogue
generation and when the cost model check can be done at compile time.
Resulting CFG would be:
int nunits;
tree vectype;
- if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD)
+ if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
return NULL_TREE;
- /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
+ /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
is expected. */
- nunits = UNITS_PER_SIMD_WORD / nbytes;
+ nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
vectype = build_vector_type (scalar_type, nunits);
if (vect_print_dump_info (REPORT_DETAILS))
&& (!targetm.vectorize.builtin_mask_for_load
|| targetm.vectorize.builtin_mask_for_load ()))
{
- if (nested_in_vect_loop
- && TREE_INT_CST_LOW (DR_STEP (dr)) != UNITS_PER_SIMD_WORD)
- return dr_explicit_realign;
- else
- return dr_explicit_realign_optimized;
+ tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+ if (nested_in_vect_loop
+ && (TREE_INT_CST_LOW (DR_STEP (dr))
+ != GET_MODE_SIZE (TYPE_MODE (vectype))))
+ return dr_explicit_realign;
+ else
+ return dr_explicit_realign_optimized;
}
if (optab_handler (movmisalign_optab, mode)->insn_code !=
/* The result of a vectorized widening operation usually requires two vectors
(because the widened results do not fit int one vector). The generated
vector results would normally be expected to be generated in the same
- order as in the original scalar computation. i.e. if 8 results are
+ order as in the original scalar computation, i.e. if 8 results are
generated in each vector iteration, they are to be organized as follows:
vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
of {mult_even,mult_odd} generate the following vectors:
vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
- When vectorizaing outer-loops, we execute the inner-loop sequentially
+ When vectorizing outer-loops, we execute the inner-loop sequentially
(each vectorized inner-loop iteration contributes to VF outer-loop
iterations in parallel). We therefore don't allow to change the order
of the computation in the inner-loop during outer-loop vectorization. */
if (code == FIX_TRUNC_EXPR)
{
/* The signedness is determined from output operand. */
- optab1 = optab_for_tree_code (c1, type);
- optab2 = optab_for_tree_code (c2, type);
+ optab1 = optab_for_tree_code (c1, type, optab_default);
+ optab2 = optab_for_tree_code (c2, type, optab_default);
}
else
{
- optab1 = optab_for_tree_code (c1, vectype);
- optab2 = optab_for_tree_code (c2, vectype);
+ optab1 = optab_for_tree_code (c1, vectype, optab_default);
+ optab2 = optab_for_tree_code (c2, vectype, optab_default);
}
if (!optab1 || !optab2)
if (code == FIX_TRUNC_EXPR)
/* The signedness is determined from output operand. */
- optab1 = optab_for_tree_code (c1, type);
+ optab1 = optab_for_tree_code (c1, type, optab_default);
else
- optab1 = optab_for_tree_code (c1, vectype);
+ optab1 = optab_for_tree_code (c1, vectype, optab_default);
if (!optab1)
return false;
computation. This may change the behavior of the program in some
cases, so we need to check that this is ok. One exception is when
vectorizing an outer-loop: the inner-loop is executed sequentially,
- and therefore vectorizing reductions in the inner-loop durint
+ and therefore vectorizing reductions in the inner-loop during
outer-loop vectorization is safe. */
/* CHECKME: check for !flag_finite_math_only too? */
}
vect_loop_location = UNKNOWN_LOC;
+ statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)
|| (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
&& num_vectorized_loops > 0))
loop = get_loop (i);
if (!loop)
continue;
- loop_vinfo = loop->aux;
+ loop_vinfo = (loop_vec_info) loop->aux;
destroy_loop_vec_info (loop_vinfo, true);
loop->aux = NULL;
}