2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
5 Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "tree-pretty-print.h"
31 #include "gimple-pretty-print.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
35 #include "cfglayout.h"
40 #include "diagnostic-core.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
46 /* Loop Vectorization Pass.
48 This pass tries to vectorize loops.
50 For example, the vectorizer transforms the following simple loop:
52 short a[N]; short b[N]; short c[N]; int i;
58 as if it was manually vectorized by rewriting the source code into:
60 typedef int __attribute__((mode(V8HI))) v8hi;
61 short a[N]; short b[N]; short c[N]; int i;
62 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
65 for (i=0; i<N/8; i++){
72 The main entry to this pass is vectorize_loops(), in which
73 the vectorizer applies a set of analyses on a given set of loops,
74 followed by the actual vectorization transformation for the loops that
75 had successfully passed the analysis phase.
76 Throughout this pass we make a distinction between two types of
77 data: scalars (which are represented by SSA_NAMES), and memory references
78 ("data-refs"). These two types of data require different handling both
79 during analysis and transformation. The types of data-refs that the
80 vectorizer currently supports are ARRAY_REFS which base is an array DECL
81 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
82 accesses are required to have a simple (consecutive) access pattern.
86 The driver for the analysis phase is vect_analyze_loop().
87 It applies a set of analyses, some of which rely on the scalar evolution
88 analyzer (scev) developed by Sebastian Pop.
90 During the analysis phase the vectorizer records some information
91 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
92 loop, as well as general information about the loop as a whole, which is
93 recorded in a "loop_vec_info" struct attached to each loop.
97 The loop transformation phase scans all the stmts in the loop, and
98 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
99 the loop that needs to be vectorized. It inserts the vector code sequence
100 just before the scalar stmt S, and records a pointer to the vector code
101 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
102 attached to S). This pointer will be used for the vectorization of following
103 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
104 otherwise, we rely on dead code elimination for removing it.
106 For example, say stmt S1 was vectorized into stmt VS1:
109 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
112 To vectorize stmt S2, the vectorizer first finds the stmt that defines
113 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
114 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
115 resulting sequence would be:
118 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
120 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
122 Operands that are not SSA_NAMEs, are data-refs that appear in
123 load/store operations (like 'x[i]' in S1), and are handled differently.
127 Currently the only target specific information that is used is the
128 size of the vector (in bytes) - "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD".
129 Targets that can support different sizes of vectors, for now will need
130 to specify one value for "TARGET_VECTORIZE_UNITS_PER_SIMD_WORD". More
131 flexibility will be added in the future.
133 Since we only vectorize operations which vector form can be
134 expressed using existing tree codes, to verify that an operation is
135 supported, the vectorizer checks the relevant optab at the relevant
136 machine_mode (e.g, optab_handler (add_optab, V8HImode)). If
137 the value found is CODE_FOR_nothing, then there's no target support, and
138 we can't vectorize the stmt.
140 For additional information on this project see:
141 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
144 /* Function vect_determine_vectorization_factor
146 Determine the vectorization factor (VF). VF is the number of data elements
147 that are operated upon in parallel in a single iteration of the vectorized
148 loop. For example, when vectorizing a loop that operates on 4byte elements,
149 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
150 elements can fit in a single vector register.
152 We currently support vectorization of loops in which all types operated upon
153 are of the same size. Therefore this function currently sets VF according to
154 the size of the types operated upon, and fails if there are multiple sizes
157 VF is also the factor by which the loop iterations are strip-mined, e.g.:
164 for (i=0; i<N; i+=VF){
165 a[i:VF] = b[i:VF] + c[i:VF];
170 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
172 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
173 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
174 int nbbs = loop->num_nodes;
175 gimple_stmt_iterator si;
176 unsigned int vectorization_factor = 0;
181 stmt_vec_info stmt_info;
185 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
188 for (i = 0; i < nbbs; i++)
190 basic_block bb = bbs[i];
192 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
195 stmt_info = vinfo_for_stmt (phi);
196 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "==> examining phi: ");
199 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
202 gcc_assert (stmt_info);
204 if (STMT_VINFO_RELEVANT_P (stmt_info))
206 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
207 scalar_type = TREE_TYPE (PHI_RESULT (phi));
209 if (vect_print_dump_info (REPORT_DETAILS))
211 fprintf (vect_dump, "get vectype for scalar type: ");
212 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
215 vectype = get_vectype_for_scalar_type (scalar_type);
218 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
221 "not vectorized: unsupported data-type ");
222 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
226 STMT_VINFO_VECTYPE (stmt_info) = vectype;
228 if (vect_print_dump_info (REPORT_DETAILS))
230 fprintf (vect_dump, "vectype: ");
231 print_generic_expr (vect_dump, vectype, TDF_SLIM);
234 nunits = TYPE_VECTOR_SUBPARTS (vectype);
235 if (vect_print_dump_info (REPORT_DETAILS))
236 fprintf (vect_dump, "nunits = %d", nunits);
238 if (!vectorization_factor
239 || (nunits > vectorization_factor))
240 vectorization_factor = nunits;
244 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
247 gimple stmt = gsi_stmt (si);
248 stmt_info = vinfo_for_stmt (stmt);
250 if (vect_print_dump_info (REPORT_DETAILS))
252 fprintf (vect_dump, "==> examining statement: ");
253 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
256 gcc_assert (stmt_info);
258 /* skip stmts which do not need to be vectorized. */
259 if (!STMT_VINFO_RELEVANT_P (stmt_info)
260 && !STMT_VINFO_LIVE_P (stmt_info))
262 if (vect_print_dump_info (REPORT_DETAILS))
263 fprintf (vect_dump, "skip.");
267 if (gimple_get_lhs (stmt) == NULL_TREE)
269 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
271 fprintf (vect_dump, "not vectorized: irregular stmt.");
272 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
277 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
279 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
281 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
282 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
287 if (STMT_VINFO_VECTYPE (stmt_info))
289 /* The only case when a vectype had been already set is for stmts
290 that contain a dataref, or for "pattern-stmts" (stmts generated
291 by the vectorizer to represent/replace a certain idiom). */
292 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
293 || is_pattern_stmt_p (stmt_info));
294 vectype = STMT_VINFO_VECTYPE (stmt_info);
298 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)
299 && !is_pattern_stmt_p (stmt_info));
301 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
302 if (vect_print_dump_info (REPORT_DETAILS))
304 fprintf (vect_dump, "get vectype for scalar type: ");
305 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
307 vectype = get_vectype_for_scalar_type (scalar_type);
310 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
313 "not vectorized: unsupported data-type ");
314 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
319 STMT_VINFO_VECTYPE (stmt_info) = vectype;
322 /* The vectorization factor is according to the smallest
323 scalar type (or the largest vector size, but we only
324 support one vector size per loop). */
325 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
327 if (vect_print_dump_info (REPORT_DETAILS))
329 fprintf (vect_dump, "get vectype for scalar type: ");
330 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
332 vf_vectype = get_vectype_for_scalar_type (scalar_type);
335 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
338 "not vectorized: unsupported data-type ");
339 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
344 if ((GET_MODE_SIZE (TYPE_MODE (vectype))
345 != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
347 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
350 "not vectorized: different sized vector "
351 "types in statement, ");
352 print_generic_expr (vect_dump, vectype, TDF_SLIM);
353 fprintf (vect_dump, " and ");
354 print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
359 if (vect_print_dump_info (REPORT_DETAILS))
361 fprintf (vect_dump, "vectype: ");
362 print_generic_expr (vect_dump, vf_vectype, TDF_SLIM);
365 nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
366 if (vect_print_dump_info (REPORT_DETAILS))
367 fprintf (vect_dump, "nunits = %d", nunits);
369 if (!vectorization_factor
370 || (nunits > vectorization_factor))
371 vectorization_factor = nunits;
375 /* TODO: Analyze cost. Decide if worth while to vectorize. */
376 if (vect_print_dump_info (REPORT_DETAILS))
377 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
378 if (vectorization_factor <= 1)
380 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
381 fprintf (vect_dump, "not vectorized: unsupported data-type");
384 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
390 /* Function vect_is_simple_iv_evolution.
392 FORNOW: A simple evolution of an induction variables in the loop is
393 considered a polynomial evolution with constant step. */
396 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
401 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
403 /* When there is no evolution in this loop, the evolution function
405 if (evolution_part == NULL_TREE)
408 /* When the evolution is a polynomial of degree >= 2
409 the evolution function is not "simple". */
410 if (tree_is_chrec (evolution_part))
413 step_expr = evolution_part;
414 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
416 if (vect_print_dump_info (REPORT_DETAILS))
418 fprintf (vect_dump, "step: ");
419 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
420 fprintf (vect_dump, ", init: ");
421 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
427 if (TREE_CODE (step_expr) != INTEGER_CST)
429 if (vect_print_dump_info (REPORT_DETAILS))
430 fprintf (vect_dump, "step unknown.");
437 /* Function vect_analyze_scalar_cycles_1.
439 Examine the cross iteration def-use cycles of scalar variables
440 in LOOP. LOOP_VINFO represents the loop that is now being
441 considered for vectorization (can be LOOP, or an outer-loop
445 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
447 basic_block bb = loop->header;
449 VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
450 gimple_stmt_iterator gsi;
453 if (vect_print_dump_info (REPORT_DETAILS))
454 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
456 /* First - identify all inductions. Reduction detection assumes that all the
457 inductions have been identified, therefore, this order must not be
459 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
461 gimple phi = gsi_stmt (gsi);
462 tree access_fn = NULL;
463 tree def = PHI_RESULT (phi);
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
466 if (vect_print_dump_info (REPORT_DETAILS))
468 fprintf (vect_dump, "Analyze phi: ");
469 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
472 /* Skip virtual phi's. The data dependences that are associated with
473 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
474 if (!is_gimple_reg (SSA_NAME_VAR (def)))
477 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
479 /* Analyze the evolution function. */
480 access_fn = analyze_scalar_evolution (loop, def);
482 STRIP_NOPS (access_fn);
483 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
485 fprintf (vect_dump, "Access function of PHI: ");
486 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
490 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
492 VEC_safe_push (gimple, heap, worklist, phi);
496 if (vect_print_dump_info (REPORT_DETAILS))
497 fprintf (vect_dump, "Detected induction.");
498 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
502 /* Second - identify all reductions and nested cycles. */
503 while (VEC_length (gimple, worklist) > 0)
505 gimple phi = VEC_pop (gimple, worklist);
506 tree def = PHI_RESULT (phi);
507 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
511 if (vect_print_dump_info (REPORT_DETAILS))
513 fprintf (vect_dump, "Analyze phi: ");
514 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
517 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
518 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
520 nested_cycle = (loop != LOOP_VINFO_LOOP (loop_vinfo));
521 reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, !nested_cycle,
527 if (vect_print_dump_info (REPORT_DETAILS))
528 fprintf (vect_dump, "Detected double reduction.");
530 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
531 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
532 vect_double_reduction_def;
538 if (vect_print_dump_info (REPORT_DETAILS))
539 fprintf (vect_dump, "Detected vectorizable nested cycle.");
541 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
542 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
547 if (vect_print_dump_info (REPORT_DETAILS))
548 fprintf (vect_dump, "Detected reduction.");
550 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
551 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
553 /* Store the reduction cycles for possible vectorization in
555 VEC_safe_push (gimple, heap,
556 LOOP_VINFO_REDUCTIONS (loop_vinfo),
562 if (vect_print_dump_info (REPORT_DETAILS))
563 fprintf (vect_dump, "Unknown def-use cycle pattern.");
566 VEC_free (gimple, heap, worklist);
570 /* Function vect_analyze_scalar_cycles.
572 Examine the cross iteration def-use cycles of scalar variables, by
573 analyzing the loop-header PHIs of scalar variables. Classify each
574 cycle as one of the following: invariant, induction, reduction, unknown.
575 We do that for the loop represented by LOOP_VINFO, and also to its
576 inner-loop, if exists.
577 Examples for scalar cycles:
592 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
594 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
596 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
598 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
599 Reductions in such inner-loop therefore have different properties than
600 the reductions in the nest that gets vectorized:
601 1. When vectorized, they are executed in the same order as in the original
602 scalar loop, so we can't change the order of computation when
604 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
605 current checks are too strict. */
608 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
611 /* Function vect_get_loop_niters.
613 Determine how many iterations the loop is executed.
614 If an expression that represents the number of iterations
615 can be constructed, place it in NUMBER_OF_ITERATIONS.
616 Return the loop exit condition. */
619 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
623 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "=== get_loop_niters ===");
626 niters = number_of_exit_cond_executions (loop);
628 if (niters != NULL_TREE
629 && niters != chrec_dont_know)
631 *number_of_iterations = niters;
633 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "==> get_loop_niters:" );
636 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
640 return get_loop_exit_condition (loop);
644 /* Function bb_in_loop_p
646 Used as predicate for dfs order traversal of the loop bbs. */
649 bb_in_loop_p (const_basic_block bb, const void *data)
651 const struct loop *const loop = (const struct loop *)data;
652 if (flow_bb_inside_loop_p (loop, bb))
658 /* Function new_loop_vec_info.
660 Create and initialize a new loop_vec_info struct for LOOP, as well as
661 stmt_vec_info structs for all the stmts in LOOP. */
664 new_loop_vec_info (struct loop *loop)
668 gimple_stmt_iterator si;
669 unsigned int i, nbbs;
671 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
672 LOOP_VINFO_LOOP (res) = loop;
674 bbs = get_loop_body (loop);
676 /* Create/Update stmt_info for all stmts in the loop. */
677 for (i = 0; i < loop->num_nodes; i++)
679 basic_block bb = bbs[i];
681 /* BBs in a nested inner-loop will have been already processed (because
682 we will have called vect_analyze_loop_form for any nested inner-loop).
683 Therefore, for stmts in an inner-loop we just want to update the
684 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
685 loop_info of the outer-loop we are currently considering to vectorize
686 (instead of the loop_info of the inner-loop).
687 For stmts in other BBs we need to create a stmt_info from scratch. */
688 if (bb->loop_father != loop)
691 gcc_assert (loop->inner && bb->loop_father == loop->inner);
692 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
694 gimple phi = gsi_stmt (si);
695 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
696 loop_vec_info inner_loop_vinfo =
697 STMT_VINFO_LOOP_VINFO (stmt_info);
698 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
699 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
701 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
703 gimple stmt = gsi_stmt (si);
704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
705 loop_vec_info inner_loop_vinfo =
706 STMT_VINFO_LOOP_VINFO (stmt_info);
707 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
708 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
713 /* bb in current nest. */
714 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
716 gimple phi = gsi_stmt (si);
717 gimple_set_uid (phi, 0);
718 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res, NULL));
721 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
723 gimple stmt = gsi_stmt (si);
724 gimple_set_uid (stmt, 0);
725 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res, NULL));
730 /* CHECKME: We want to visit all BBs before their successors (except for
731 latch blocks, for which this assertion wouldn't hold). In the simple
732 case of the loop forms we allow, a dfs order of the BBs would the same
733 as reversed postorder traversal, so we are safe. */
736 bbs = XCNEWVEC (basic_block, loop->num_nodes);
737 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
738 bbs, loop->num_nodes, loop);
739 gcc_assert (nbbs == loop->num_nodes);
741 LOOP_VINFO_BBS (res) = bbs;
742 LOOP_VINFO_NITERS (res) = NULL;
743 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
744 LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
745 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
746 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
747 LOOP_VINFO_VECT_FACTOR (res) = 0;
748 LOOP_VINFO_LOOP_NEST (res) = VEC_alloc (loop_p, heap, 3);
749 LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
750 LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
751 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
752 LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
753 VEC_alloc (gimple, heap,
754 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
755 LOOP_VINFO_MAY_ALIAS_DDRS (res) =
756 VEC_alloc (ddr_p, heap,
757 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
758 LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
759 LOOP_VINFO_REDUCTIONS (res) = VEC_alloc (gimple, heap, 10);
760 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
761 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
762 LOOP_VINFO_PEELING_HTAB (res) = NULL;
768 /* Function destroy_loop_vec_info.
770 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
771 stmts in the loop. */
774 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
779 gimple_stmt_iterator si;
781 VEC (slp_instance, heap) *slp_instances;
782 slp_instance instance;
787 loop = LOOP_VINFO_LOOP (loop_vinfo);
789 bbs = LOOP_VINFO_BBS (loop_vinfo);
790 nbbs = loop->num_nodes;
794 free (LOOP_VINFO_BBS (loop_vinfo));
795 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
796 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
797 VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
798 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
799 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
806 for (j = 0; j < nbbs; j++)
808 basic_block bb = bbs[j];
809 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
810 free_stmt_vec_info (gsi_stmt (si));
812 for (si = gsi_start_bb (bb); !gsi_end_p (si); )
814 gimple stmt = gsi_stmt (si);
815 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
819 /* Check if this is a "pattern stmt" (introduced by the
820 vectorizer during the pattern recognition pass). */
821 bool remove_stmt_p = false;
822 gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
825 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
827 && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
828 remove_stmt_p = true;
831 /* Free stmt_vec_info. */
832 free_stmt_vec_info (stmt);
834 /* Remove dead "pattern stmts". */
836 gsi_remove (&si, true);
842 free (LOOP_VINFO_BBS (loop_vinfo));
843 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
844 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
845 VEC_free (loop_p, heap, LOOP_VINFO_LOOP_NEST (loop_vinfo));
846 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
847 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
848 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
849 FOR_EACH_VEC_ELT (slp_instance, slp_instances, j, instance)
850 vect_free_slp_instance (instance);
852 VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
853 VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
854 VEC_free (gimple, heap, LOOP_VINFO_REDUCTIONS (loop_vinfo));
856 if (LOOP_VINFO_PEELING_HTAB (loop_vinfo))
857 htab_delete (LOOP_VINFO_PEELING_HTAB (loop_vinfo));
864 /* Function vect_analyze_loop_1.
866 Apply a set of analyses on LOOP, and create a loop_vec_info struct
867 for it. The different analyses will record information in the
868 loop_vec_info struct. This is a subset of the analyses applied in
869 vect_analyze_loop, to be applied on an inner-loop nested in the loop
870 that is now considered for (outer-loop) vectorization. */
873 vect_analyze_loop_1 (struct loop *loop)
875 loop_vec_info loop_vinfo;
877 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
880 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
882 loop_vinfo = vect_analyze_loop_form (loop);
885 if (vect_print_dump_info (REPORT_DETAILS))
886 fprintf (vect_dump, "bad inner-loop form.");
894 /* Function vect_analyze_loop_form.
896 Verify that certain CFG restrictions hold, including:
897 - the loop has a pre-header
898 - the loop has a single entry and exit
899 - the loop exit condition is simple enough, and the number of iterations
900 can be analyzed (a countable loop). */
903 vect_analyze_loop_form (struct loop *loop)
905 loop_vec_info loop_vinfo;
907 tree number_of_iterations = NULL;
908 loop_vec_info inner_loop_vinfo = NULL;
910 if (vect_print_dump_info (REPORT_DETAILS))
911 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
913 /* Different restrictions apply when we are considering an inner-most loop,
914 vs. an outer (nested) loop.
915 (FORNOW. May want to relax some of these restrictions in the future). */
919 /* Inner-most loop. We currently require that the number of BBs is
920 exactly 2 (the header and latch). Vectorizable inner-most loops
931 if (loop->num_nodes != 2)
933 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
934 fprintf (vect_dump, "not vectorized: control flow in loop.");
938 if (empty_block_p (loop->header))
940 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
941 fprintf (vect_dump, "not vectorized: empty loop.");
947 struct loop *innerloop = loop->inner;
950 /* Nested loop. We currently require that the loop is doubly-nested,
951 contains a single inner loop, and the number of BBs is exactly 5.
952 Vectorizable outer-loops look like this:
964 The inner-loop has the properties expected of inner-most loops
965 as described above. */
967 if ((loop->inner)->inner || (loop->inner)->next)
969 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
970 fprintf (vect_dump, "not vectorized: multiple nested loops.");
974 /* Analyze the inner-loop. */
975 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
976 if (!inner_loop_vinfo)
978 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
979 fprintf (vect_dump, "not vectorized: Bad inner loop.");
983 if (!expr_invariant_in_loop_p (loop,
984 LOOP_VINFO_NITERS (inner_loop_vinfo)))
986 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
988 "not vectorized: inner-loop count not invariant.");
989 destroy_loop_vec_info (inner_loop_vinfo, true);
993 if (loop->num_nodes != 5)
995 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
996 fprintf (vect_dump, "not vectorized: control flow in loop.");
997 destroy_loop_vec_info (inner_loop_vinfo, true);
1001 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
1002 entryedge = EDGE_PRED (innerloop->header, 0);
1003 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
1004 entryedge = EDGE_PRED (innerloop->header, 1);
1006 if (entryedge->src != loop->header
1007 || !single_exit (innerloop)
1008 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1010 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1011 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
1012 destroy_loop_vec_info (inner_loop_vinfo, true);
1016 if (vect_print_dump_info (REPORT_DETAILS))
1017 fprintf (vect_dump, "Considering outer-loop vectorization.");
1020 if (!single_exit (loop)
1021 || EDGE_COUNT (loop->header->preds) != 2)
1023 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1025 if (!single_exit (loop))
1026 fprintf (vect_dump, "not vectorized: multiple exits.");
1027 else if (EDGE_COUNT (loop->header->preds) != 2)
1028 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1030 if (inner_loop_vinfo)
1031 destroy_loop_vec_info (inner_loop_vinfo, true);
1035 /* We assume that the loop exit condition is at the end of the loop. i.e,
1036 that the loop is represented as a do-while (with a proper if-guard
1037 before the loop if needed), where the loop header contains all the
1038 executable statements, and the latch is empty. */
1039 if (!empty_block_p (loop->latch)
1040 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1042 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1043 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1044 if (inner_loop_vinfo)
1045 destroy_loop_vec_info (inner_loop_vinfo, true);
1049 /* Make sure there exists a single-predecessor exit bb: */
1050 if (!single_pred_p (single_exit (loop)->dest))
1052 edge e = single_exit (loop);
1053 if (!(e->flags & EDGE_ABNORMAL))
1055 split_loop_exit_edge (e);
1056 if (vect_print_dump_info (REPORT_DETAILS))
1057 fprintf (vect_dump, "split exit edge.");
1061 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1062 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1063 if (inner_loop_vinfo)
1064 destroy_loop_vec_info (inner_loop_vinfo, true);
1069 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1072 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1073 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1074 if (inner_loop_vinfo)
1075 destroy_loop_vec_info (inner_loop_vinfo, true);
1079 if (!number_of_iterations)
1081 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1083 "not vectorized: number of iterations cannot be computed.");
1084 if (inner_loop_vinfo)
1085 destroy_loop_vec_info (inner_loop_vinfo, true);
1089 if (chrec_contains_undetermined (number_of_iterations))
1091 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1092 fprintf (vect_dump, "Infinite number of iterations.");
1093 if (inner_loop_vinfo)
1094 destroy_loop_vec_info (inner_loop_vinfo, true);
1098 if (!NITERS_KNOWN_P (number_of_iterations))
1100 if (vect_print_dump_info (REPORT_DETAILS))
1102 fprintf (vect_dump, "Symbolic number of iterations is ");
1103 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1106 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
1108 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1109 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1110 if (inner_loop_vinfo)
1111 destroy_loop_vec_info (inner_loop_vinfo, false);
1115 loop_vinfo = new_loop_vec_info (loop);
1116 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1117 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1119 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
1121 /* CHECKME: May want to keep it around it in the future. */
1122 if (inner_loop_vinfo)
1123 destroy_loop_vec_info (inner_loop_vinfo, false);
1125 gcc_assert (!loop->aux);
1126 loop->aux = loop_vinfo;
1131 /* Get cost by calling cost target builtin. */
1134 vect_get_cost (enum vect_cost_for_stmt type_of_cost)
1136 tree dummy_type = NULL;
1139 return targetm.vectorize.builtin_vectorization_cost (type_of_cost,
1144 /* Function vect_analyze_loop_operations.
1146 Scan the loop stmts and make sure they are all vectorizable. */
1149 vect_analyze_loop_operations (loop_vec_info loop_vinfo, bool slp)
1151 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1152 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1153 int nbbs = loop->num_nodes;
1154 gimple_stmt_iterator si;
1155 unsigned int vectorization_factor = 0;
1158 stmt_vec_info stmt_info;
1159 bool need_to_vectorize = false;
1160 int min_profitable_iters;
1161 int min_scalar_loop_bound;
1163 bool only_slp_in_loop = true, ok;
1165 if (vect_print_dump_info (REPORT_DETAILS))
1166 fprintf (vect_dump, "=== vect_analyze_loop_operations ===");
1168 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1169 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1172 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1173 vectorization factor of the loop is the unrolling factor required by
1174 the SLP instances. If that unrolling factor is 1, we say, that we
1175 perform pure SLP on loop - cross iteration parallelism is not
1177 for (i = 0; i < nbbs; i++)
1179 basic_block bb = bbs[i];
1180 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1182 gimple stmt = gsi_stmt (si);
1183 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1184 gcc_assert (stmt_info);
1185 if ((STMT_VINFO_RELEVANT_P (stmt_info)
1186 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1187 && !PURE_SLP_STMT (stmt_info))
1188 /* STMT needs both SLP and loop-based vectorization. */
1189 only_slp_in_loop = false;
1193 if (only_slp_in_loop)
1194 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
1196 vectorization_factor = least_common_multiple (vectorization_factor,
1197 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1199 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1200 if (vect_print_dump_info (REPORT_DETAILS))
1201 fprintf (vect_dump, "Updating vectorization factor to %d ",
1202 vectorization_factor);
1205 for (i = 0; i < nbbs; i++)
1207 basic_block bb = bbs[i];
1209 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1211 phi = gsi_stmt (si);
1214 stmt_info = vinfo_for_stmt (phi);
1215 if (vect_print_dump_info (REPORT_DETAILS))
1217 fprintf (vect_dump, "examining phi: ");
1218 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1221 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1222 (i.e., a phi in the tail of the outer-loop). */
1223 if (! is_loop_header_bb_p (bb))
1225 /* FORNOW: we currently don't support the case that these phis
1226 are not used in the outerloop (unless it is double reduction,
1227 i.e., this phi is vect_reduction_def), cause this case
1228 requires to actually do something here. */
1229 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
1230 || STMT_VINFO_LIVE_P (stmt_info))
1231 && STMT_VINFO_DEF_TYPE (stmt_info)
1232 != vect_double_reduction_def)
1234 if (vect_print_dump_info (REPORT_DETAILS))
1236 "Unsupported loop-closed phi in outer-loop.");
1240 /* If PHI is used in the outer loop, we check that its operand
1241 is defined in the inner loop. */
1242 if (STMT_VINFO_RELEVANT_P (stmt_info))
1247 if (gimple_phi_num_args (phi) != 1)
1250 phi_op = PHI_ARG_DEF (phi, 0);
1251 if (TREE_CODE (phi_op) != SSA_NAME)
1254 op_def_stmt = SSA_NAME_DEF_STMT (phi_op);
1255 if (!op_def_stmt || !vinfo_for_stmt (op_def_stmt))
1258 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1259 != vect_used_in_outer
1260 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1261 != vect_used_in_outer_by_reduction)
1268 gcc_assert (stmt_info);
1270 if (STMT_VINFO_LIVE_P (stmt_info))
1272 /* FORNOW: not yet supported. */
1273 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1274 fprintf (vect_dump, "not vectorized: value used after loop.");
1278 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1279 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1281 /* A scalar-dependence cycle that we don't support. */
1282 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1283 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
1287 if (STMT_VINFO_RELEVANT_P (stmt_info))
1289 need_to_vectorize = true;
1290 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
1291 ok = vectorizable_induction (phi, NULL, NULL);
1296 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1299 "not vectorized: relevant phi not supported: ");
1300 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
1306 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1308 gimple stmt = gsi_stmt (si);
1309 if (!vect_analyze_stmt (stmt, &need_to_vectorize, NULL))
1314 /* All operations in the loop are either irrelevant (deal with loop
1315 control, or dead), or only used outside the loop and can be moved
1316 out of the loop (e.g. invariants, inductions). The loop can be
1317 optimized away by scalar optimizations. We're better off not
1318 touching this loop. */
1319 if (!need_to_vectorize)
1321 if (vect_print_dump_info (REPORT_DETAILS))
1323 "All the computation can be taken out of the loop.");
1324 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1326 "not vectorized: redundant loop. no profit to vectorize.");
1330 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1331 && vect_print_dump_info (REPORT_DETAILS))
1333 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
1334 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
1336 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1337 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1339 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1340 fprintf (vect_dump, "not vectorized: iteration count too small.");
1341 if (vect_print_dump_info (REPORT_DETAILS))
1342 fprintf (vect_dump,"not vectorized: iteration count smaller than "
1343 "vectorization factor.");
1347 /* Analyze cost. Decide if worth while to vectorize. */
1349 /* Once VF is set, SLP costs should be updated since the number of created
1350 vector stmts depends on VF. */
1351 vect_update_slp_costs_according_to_vf (loop_vinfo);
1353 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
1354 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
1356 if (min_profitable_iters < 0)
1358 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1359 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
1360 if (vect_print_dump_info (REPORT_DETAILS))
1361 fprintf (vect_dump, "not vectorized: vector version will never be "
1366 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1367 * vectorization_factor) - 1);
1369 /* Use the cost model only if it is more conservative than user specified
1372 th = (unsigned) min_scalar_loop_bound;
1373 if (min_profitable_iters
1374 && (!min_scalar_loop_bound
1375 || min_profitable_iters > min_scalar_loop_bound))
1376 th = (unsigned) min_profitable_iters;
1378 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1379 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
1381 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1382 fprintf (vect_dump, "not vectorized: vectorization not "
1384 if (vect_print_dump_info (REPORT_DETAILS))
1385 fprintf (vect_dump, "not vectorized: iteration count smaller than "
1386 "user specified loop bound parameter or minimum "
1387 "profitable iterations (whichever is more conservative).");
1391 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1392 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
1393 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1395 if (vect_print_dump_info (REPORT_DETAILS))
1396 fprintf (vect_dump, "epilog loop required.");
1397 if (!vect_can_advance_ivs_p (loop_vinfo))
1399 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1401 "not vectorized: can't create epilog loop 1.");
1404 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1406 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1408 "not vectorized: can't create epilog loop 2.");
1417 /* Function vect_analyze_loop_2.
1419 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1420 for it. The different analyses will record information in the
1421 loop_vec_info struct. */
1423 vect_analyze_loop_2 (loop_vec_info loop_vinfo)
1425 bool ok, dummy, slp = false;
1426 int max_vf = MAX_VECTORIZATION_FACTOR;
1429 /* Find all data references in the loop (which correspond to vdefs/vuses)
1430 and analyze their evolution in the loop. Also adjust the minimal
1431 vectorization factor according to the loads and stores.
1433 FORNOW: Handle only simple, array references, which
1434 alignment can be forced, and aligned pointer-references. */
1436 ok = vect_analyze_data_refs (loop_vinfo, NULL, &min_vf);
1439 if (vect_print_dump_info (REPORT_DETAILS))
1440 fprintf (vect_dump, "bad data references.");
1444 /* Classify all cross-iteration scalar data-flow cycles.
1445 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1447 vect_analyze_scalar_cycles (loop_vinfo);
1449 vect_pattern_recog (loop_vinfo);
1451 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1453 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "unexpected pattern.");
1461 /* Analyze data dependences between the data-refs in the loop
1462 and adjust the maximum vectorization factor according to
1464 FORNOW: fail at the first data dependence that we encounter. */
1466 ok = vect_analyze_data_ref_dependences (loop_vinfo, NULL, &max_vf, &dummy);
1470 if (vect_print_dump_info (REPORT_DETAILS))
1471 fprintf (vect_dump, "bad data dependence.");
1475 ok = vect_determine_vectorization_factor (loop_vinfo);
1478 if (vect_print_dump_info (REPORT_DETAILS))
1479 fprintf (vect_dump, "can't determine vectorization factor.");
1482 if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo))
1484 if (vect_print_dump_info (REPORT_DETAILS))
1485 fprintf (vect_dump, "bad data dependence.");
1489 /* Analyze the alignment of the data-refs in the loop.
1490 Fail if a data reference is found that cannot be vectorized. */
1492 ok = vect_analyze_data_refs_alignment (loop_vinfo, NULL);
1495 if (vect_print_dump_info (REPORT_DETAILS))
1496 fprintf (vect_dump, "bad data alignment.");
1500 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1501 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1503 ok = vect_analyze_data_ref_accesses (loop_vinfo, NULL);
1506 if (vect_print_dump_info (REPORT_DETAILS))
1507 fprintf (vect_dump, "bad data access.");
1511 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
1512 It is important to call pruning after vect_analyze_data_ref_accesses,
1513 since we use grouping information gathered by interleaving analysis. */
1514 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
1517 if (vect_print_dump_info (REPORT_DETAILS))
1518 fprintf (vect_dump, "too long list of versioning for alias "
1523 /* This pass will decide on using loop versioning and/or loop peeling in
1524 order to enhance the alignment of data references in the loop. */
1526 ok = vect_enhance_data_refs_alignment (loop_vinfo);
1529 if (vect_print_dump_info (REPORT_DETAILS))
1530 fprintf (vect_dump, "bad data alignment.");
1534 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1535 ok = vect_analyze_slp (loop_vinfo, NULL);
1538 /* Decide which possible SLP instances to SLP. */
1539 slp = vect_make_slp_decision (loop_vinfo);
1541 /* Find stmts that need to be both vectorized and SLPed. */
1542 vect_detect_hybrid_slp (loop_vinfo);
1545 /* Scan all the operations in the loop and make sure they are
1548 ok = vect_analyze_loop_operations (loop_vinfo, slp);
1551 if (vect_print_dump_info (REPORT_DETAILS))
1552 fprintf (vect_dump, "bad operation or unsupported loop bound.");
1559 /* Function vect_analyze_loop.
1561 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1562 for it. The different analyses will record information in the
1563 loop_vec_info struct. */
1565 vect_analyze_loop (struct loop *loop)
1567 loop_vec_info loop_vinfo;
1568 unsigned int vector_sizes;
1570 /* Autodetect first vector size we try. */
1571 current_vector_size = 0;
1572 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
1574 if (vect_print_dump_info (REPORT_DETAILS))
1575 fprintf (vect_dump, "===== analyze_loop_nest =====");
1577 if (loop_outer (loop)
1578 && loop_vec_info_for_loop (loop_outer (loop))
1579 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
1581 if (vect_print_dump_info (REPORT_DETAILS))
1582 fprintf (vect_dump, "outer-loop already vectorized.");
1588 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
1589 loop_vinfo = vect_analyze_loop_form (loop);
1592 if (vect_print_dump_info (REPORT_DETAILS))
1593 fprintf (vect_dump, "bad loop form.");
1597 if (vect_analyze_loop_2 (loop_vinfo))
1599 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
1604 destroy_loop_vec_info (loop_vinfo, true);
1606 vector_sizes &= ~current_vector_size;
1607 if (vector_sizes == 0
1608 || current_vector_size == 0)
1611 /* Try the next biggest vector size. */
1612 current_vector_size = 1 << floor_log2 (vector_sizes);
1613 if (vect_print_dump_info (REPORT_DETAILS))
1614 fprintf (vect_dump, "***** Re-trying analysis with "
1615 "vector size %d\n", current_vector_size);
1620 /* Function reduction_code_for_scalar_code
1623 CODE - tree_code of a reduction operations.
1626 REDUC_CODE - the corresponding tree-code to be used to reduce the
1627 vector of partial results into a single scalar result (which
1628 will also reside in a vector) or ERROR_MARK if the operation is
1629 a supported reduction operation, but does not have such tree-code.
1631 Return FALSE if CODE currently cannot be vectorized as reduction. */
1634 reduction_code_for_scalar_code (enum tree_code code,
1635 enum tree_code *reduc_code)
1640 *reduc_code = REDUC_MAX_EXPR;
1644 *reduc_code = REDUC_MIN_EXPR;
1648 *reduc_code = REDUC_PLUS_EXPR;
1656 *reduc_code = ERROR_MARK;
1665 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
1666 STMT is printed with a message MSG. */
1669 report_vect_op (gimple stmt, const char *msg)
1671 fprintf (vect_dump, "%s", msg);
1672 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1676 /* Function vect_is_simple_reduction_1
1678 (1) Detect a cross-iteration def-use cycle that represents a simple
1679 reduction computation. We look for the following pattern:
1684 a2 = operation (a3, a1)
1687 1. operation is commutative and associative and it is safe to
1688 change the order of the computation (if CHECK_REDUCTION is true)
1689 2. no uses for a2 in the loop (a2 is used out of the loop)
1690 3. no uses of a1 in the loop besides the reduction operation
1691 4. no uses of a1 outside the loop.
1693 Conditions 1,4 are tested here.
1694 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
1696 (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
1697 nested cycles, if CHECK_REDUCTION is false.
1699 (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
1703 inner loop (def of a3)
1706 If MODIFY is true it tries also to rework the code in-place to enable
1707 detection of more reduction patterns. For the time being we rewrite
1708 "res -= RHS" into "rhs += -RHS" when it seems worthwhile.
1712 vect_is_simple_reduction_1 (loop_vec_info loop_info, gimple phi,
1713 bool check_reduction, bool *double_reduc,
1716 struct loop *loop = (gimple_bb (phi))->loop_father;
1717 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
1718 edge latch_e = loop_latch_edge (loop);
1719 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
1720 gimple def_stmt, def1 = NULL, def2 = NULL;
1721 enum tree_code orig_code, code;
1722 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
1726 imm_use_iterator imm_iter;
1727 use_operand_p use_p;
1730 *double_reduc = false;
1732 /* If CHECK_REDUCTION is true, we assume inner-most loop vectorization,
1733 otherwise, we assume outer loop vectorization. */
1734 gcc_assert ((check_reduction && loop == vect_loop)
1735 || (!check_reduction && flow_loop_nested_p (vect_loop, loop)));
1737 name = PHI_RESULT (phi);
1739 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
1741 gimple use_stmt = USE_STMT (use_p);
1742 if (is_gimple_debug (use_stmt))
1745 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1747 if (vect_print_dump_info (REPORT_DETAILS))
1748 fprintf (vect_dump, "intermediate value used outside loop.");
1753 if (vinfo_for_stmt (use_stmt)
1754 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
1758 if (vect_print_dump_info (REPORT_DETAILS))
1759 fprintf (vect_dump, "reduction used in loop.");
1764 if (TREE_CODE (loop_arg) != SSA_NAME)
1766 if (vect_print_dump_info (REPORT_DETAILS))
1768 fprintf (vect_dump, "reduction: not ssa_name: ");
1769 print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
1774 def_stmt = SSA_NAME_DEF_STMT (loop_arg);
1777 if (vect_print_dump_info (REPORT_DETAILS))
1778 fprintf (vect_dump, "reduction: no def_stmt.");
1782 if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI)
1784 if (vect_print_dump_info (REPORT_DETAILS))
1785 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1789 if (is_gimple_assign (def_stmt))
1791 name = gimple_assign_lhs (def_stmt);
1796 name = PHI_RESULT (def_stmt);
1801 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
1803 gimple use_stmt = USE_STMT (use_p);
1804 if (is_gimple_debug (use_stmt))
1806 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
1807 && vinfo_for_stmt (use_stmt)
1808 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
1812 if (vect_print_dump_info (REPORT_DETAILS))
1813 fprintf (vect_dump, "reduction used in loop.");
1818 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
1819 defined in the inner loop. */
1822 op1 = PHI_ARG_DEF (def_stmt, 0);
1824 if (gimple_phi_num_args (def_stmt) != 1
1825 || TREE_CODE (op1) != SSA_NAME)
1827 if (vect_print_dump_info (REPORT_DETAILS))
1828 fprintf (vect_dump, "unsupported phi node definition.");
1833 def1 = SSA_NAME_DEF_STMT (op1);
1834 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
1836 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
1837 && is_gimple_assign (def1))
1839 if (vect_print_dump_info (REPORT_DETAILS))
1840 report_vect_op (def_stmt, "detected double reduction: ");
1842 *double_reduc = true;
1849 code = orig_code = gimple_assign_rhs_code (def_stmt);
1851 /* We can handle "res -= x[i]", which is non-associative by
1852 simply rewriting this into "res += -x[i]". Avoid changing
1853 gimple instruction for the first simple tests and only do this
1854 if we're allowed to change code at all. */
1855 if (code == MINUS_EXPR
1857 && (op1 = gimple_assign_rhs1 (def_stmt))
1858 && TREE_CODE (op1) == SSA_NAME
1859 && SSA_NAME_DEF_STMT (op1) == phi)
1863 && (!commutative_tree_code (code) || !associative_tree_code (code)))
1865 if (vect_print_dump_info (REPORT_DETAILS))
1866 report_vect_op (def_stmt, "reduction: not commutative/associative: ");
1870 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
1872 if (code != COND_EXPR)
1874 if (vect_print_dump_info (REPORT_DETAILS))
1875 report_vect_op (def_stmt, "reduction: not binary operation: ");
1880 op3 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
1881 if (COMPARISON_CLASS_P (op3))
1883 op4 = TREE_OPERAND (op3, 1);
1884 op3 = TREE_OPERAND (op3, 0);
1887 op1 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 1);
1888 op2 = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 2);
1890 if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
1892 if (vect_print_dump_info (REPORT_DETAILS))
1893 report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
1900 op1 = gimple_assign_rhs1 (def_stmt);
1901 op2 = gimple_assign_rhs2 (def_stmt);
1903 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1905 if (vect_print_dump_info (REPORT_DETAILS))
1906 report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
1912 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
1913 if ((TREE_CODE (op1) == SSA_NAME
1914 && !types_compatible_p (type,TREE_TYPE (op1)))
1915 || (TREE_CODE (op2) == SSA_NAME
1916 && !types_compatible_p (type, TREE_TYPE (op2)))
1917 || (op3 && TREE_CODE (op3) == SSA_NAME
1918 && !types_compatible_p (type, TREE_TYPE (op3)))
1919 || (op4 && TREE_CODE (op4) == SSA_NAME
1920 && !types_compatible_p (type, TREE_TYPE (op4))))
1922 if (vect_print_dump_info (REPORT_DETAILS))
1924 fprintf (vect_dump, "reduction: multiple types: operation type: ");
1925 print_generic_expr (vect_dump, type, TDF_SLIM);
1926 fprintf (vect_dump, ", operands types: ");
1927 print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
1928 fprintf (vect_dump, ",");
1929 print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
1932 fprintf (vect_dump, ",");
1933 print_generic_expr (vect_dump, TREE_TYPE (op3), TDF_SLIM);
1938 fprintf (vect_dump, ",");
1939 print_generic_expr (vect_dump, TREE_TYPE (op4), TDF_SLIM);
1946 /* Check that it's ok to change the order of the computation.
1947 Generally, when vectorizing a reduction we change the order of the
1948 computation. This may change the behavior of the program in some
1949 cases, so we need to check that this is ok. One exception is when
1950 vectorizing an outer-loop: the inner-loop is executed sequentially,
1951 and therefore vectorizing reductions in the inner-loop during
1952 outer-loop vectorization is safe. */
1954 /* CHECKME: check for !flag_finite_math_only too? */
1955 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
1958 /* Changing the order of operations changes the semantics. */
1959 if (vect_print_dump_info (REPORT_DETAILS))
1960 report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
1963 else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
1966 /* Changing the order of operations changes the semantics. */
1967 if (vect_print_dump_info (REPORT_DETAILS))
1968 report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
1971 else if (SAT_FIXED_POINT_TYPE_P (type) && check_reduction)
1973 /* Changing the order of operations changes the semantics. */
1974 if (vect_print_dump_info (REPORT_DETAILS))
1975 report_vect_op (def_stmt,
1976 "reduction: unsafe fixed-point math optimization: ");
1980 /* If we detected "res -= x[i]" earlier, rewrite it into
1981 "res += -x[i]" now. If this turns out to be useless reassoc
1982 will clean it up again. */
1983 if (orig_code == MINUS_EXPR)
1985 tree rhs = gimple_assign_rhs2 (def_stmt);
1986 tree negrhs = make_ssa_name (SSA_NAME_VAR (rhs), NULL);
1987 gimple negate_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, negrhs,
1989 gimple_stmt_iterator gsi = gsi_for_stmt (def_stmt);
1990 set_vinfo_for_stmt (negate_stmt, new_stmt_vec_info (negate_stmt,
1992 gsi_insert_before (&gsi, negate_stmt, GSI_NEW_STMT);
1993 gimple_assign_set_rhs2 (def_stmt, negrhs);
1994 gimple_assign_set_rhs_code (def_stmt, PLUS_EXPR);
1995 update_stmt (def_stmt);
1998 /* Reduction is safe. We're dealing with one of the following:
1999 1) integer arithmetic and no trapv
2000 2) floating point arithmetic, and special flags permit this optimization
2001 3) nested cycle (i.e., outer loop vectorization). */
2002 if (TREE_CODE (op1) == SSA_NAME)
2003 def1 = SSA_NAME_DEF_STMT (op1);
2005 if (TREE_CODE (op2) == SSA_NAME)
2006 def2 = SSA_NAME_DEF_STMT (op2);
2008 if (code != COND_EXPR
2009 && (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)))
2011 if (vect_print_dump_info (REPORT_DETAILS))
2012 report_vect_op (def_stmt, "reduction: no defs for operands: ");
2016 /* Check that one def is the reduction def, defined by PHI,
2017 the other def is either defined in the loop ("vect_internal_def"),
2018 or it's an induction (defined by a loop-header phi-node). */
2020 if (def2 && def2 == phi
2021 && (code == COND_EXPR
2022 || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2023 && (is_gimple_assign (def1)
2024 || is_gimple_call (def1)
2025 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2026 == vect_induction_def
2027 || (gimple_code (def1) == GIMPLE_PHI
2028 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
2029 == vect_internal_def
2030 && !is_loop_header_bb_p (gimple_bb (def1)))))))
2032 if (vect_print_dump_info (REPORT_DETAILS))
2033 report_vect_op (def_stmt, "detected reduction: ");
2036 else if (def1 && def1 == phi
2037 && (code == COND_EXPR
2038 || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2039 && (is_gimple_assign (def2)
2040 || is_gimple_call (def2)
2041 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2042 == vect_induction_def
2043 || (gimple_code (def2) == GIMPLE_PHI
2044 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
2045 == vect_internal_def
2046 && !is_loop_header_bb_p (gimple_bb (def2)))))))
2048 if (check_reduction)
2050 /* Swap operands (just for simplicity - so that the rest of the code
2051 can assume that the reduction variable is always the last (second)
2053 if (vect_print_dump_info (REPORT_DETAILS))
2054 report_vect_op (def_stmt,
2055 "detected reduction: need to swap operands: ");
2057 swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2058 gimple_assign_rhs2_ptr (def_stmt));
2062 if (vect_print_dump_info (REPORT_DETAILS))
2063 report_vect_op (def_stmt, "detected reduction: ");
2070 if (vect_print_dump_info (REPORT_DETAILS))
2071 report_vect_op (def_stmt, "reduction: unknown pattern: ");
2077 /* Wrapper around vect_is_simple_reduction_1, that won't modify code
2078 in-place. Arguments as there. */
2081 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi,
2082 bool check_reduction, bool *double_reduc)
2084 return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2085 double_reduc, false);
2088 /* Wrapper around vect_is_simple_reduction_1, which will modify code
2089 in-place if it enables detection of more reductions. Arguments
2093 vect_force_simple_reduction (loop_vec_info loop_info, gimple phi,
2094 bool check_reduction, bool *double_reduc)
2096 return vect_is_simple_reduction_1 (loop_info, phi, check_reduction,
2097 double_reduc, true);
2100 /* Calculate the cost of one scalar iteration of the loop. */
2102 vect_get_single_scalar_iteraion_cost (loop_vec_info loop_vinfo)
2104 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2105 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2106 int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0;
2107 int innerloop_iters, i, stmt_cost;
2109 /* Count statements in scalar loop. Using this as scalar cost for a single
2112 TODO: Add outer loop support.
2114 TODO: Consider assigning different costs to different scalar
2118 innerloop_iters = 1;
2120 innerloop_iters = 50; /* FIXME */
2122 for (i = 0; i < nbbs; i++)
2124 gimple_stmt_iterator si;
2125 basic_block bb = bbs[i];
2127 if (bb->loop_father == loop->inner)
2128 factor = innerloop_iters;
2132 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2134 gimple stmt = gsi_stmt (si);
2135 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2137 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
2140 /* Skip stmts that are not vectorized inside the loop. */
2142 && !STMT_VINFO_RELEVANT_P (stmt_info)
2143 && (!STMT_VINFO_LIVE_P (stmt_info)
2144 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
2147 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))
2149 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
2150 stmt_cost = vect_get_cost (scalar_load);
2152 stmt_cost = vect_get_cost (scalar_store);
2155 stmt_cost = vect_get_cost (scalar_stmt);
2157 scalar_single_iter_cost += stmt_cost * factor;
2160 return scalar_single_iter_cost;
2163 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
2165 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
2166 int *peel_iters_epilogue,
2167 int scalar_single_iter_cost)
2169 int peel_guard_costs = 0;
2170 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2172 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2174 *peel_iters_epilogue = vf/2;
2175 if (vect_print_dump_info (REPORT_COST))
2176 fprintf (vect_dump, "cost model: "
2177 "epilogue peel iters set to vf/2 because "
2178 "loop iterations are unknown .");
2180 /* If peeled iterations are known but number of scalar loop
2181 iterations are unknown, count a taken branch per peeled loop. */
2182 peel_guard_costs = 2 * vect_get_cost (cond_branch_taken);
2186 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
2187 peel_iters_prologue = niters < peel_iters_prologue ?
2188 niters : peel_iters_prologue;
2189 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
2192 return (peel_iters_prologue * scalar_single_iter_cost)
2193 + (*peel_iters_epilogue * scalar_single_iter_cost)
2197 /* Function vect_estimate_min_profitable_iters
2199 Return the number of iterations required for the vector version of the
2200 loop to be profitable relative to the cost of the scalar version of the
2203 TODO: Take profile info into account before making vectorization
2204 decisions, if available. */
2207 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
2210 int min_profitable_iters;
2211 int peel_iters_prologue;
2212 int peel_iters_epilogue;
2213 int vec_inside_cost = 0;
2214 int vec_outside_cost = 0;
2215 int scalar_single_iter_cost = 0;
2216 int scalar_outside_cost = 0;
2217 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2218 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2219 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2220 int nbbs = loop->num_nodes;
2221 int npeel = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2222 int peel_guard_costs = 0;
2223 int innerloop_iters = 0, factor;
2224 VEC (slp_instance, heap) *slp_instances;
2225 slp_instance instance;
2227 /* Cost model disabled. */
2228 if (!flag_vect_cost_model)
2230 if (vect_print_dump_info (REPORT_COST))
2231 fprintf (vect_dump, "cost model disabled.");
2235 /* Requires loop versioning tests to handle misalignment. */
2236 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
2238 /* FIXME: Make cost depend on complexity of individual check. */
2240 VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
2241 if (vect_print_dump_info (REPORT_COST))
2242 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2243 "versioning to treat misalignment.\n");
2246 /* Requires loop versioning with alias checks. */
2247 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2249 /* FIXME: Make cost depend on complexity of individual check. */
2251 VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
2252 if (vect_print_dump_info (REPORT_COST))
2253 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
2254 "versioning aliasing.\n");
2257 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2258 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2259 vec_outside_cost += vect_get_cost (cond_branch_taken);
2261 /* Count statements in scalar loop. Using this as scalar cost for a single
2264 TODO: Add outer loop support.
2266 TODO: Consider assigning different costs to different scalar
2271 innerloop_iters = 50; /* FIXME */
2273 for (i = 0; i < nbbs; i++)
2275 gimple_stmt_iterator si;
2276 basic_block bb = bbs[i];
2278 if (bb->loop_father == loop->inner)
2279 factor = innerloop_iters;
2283 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2285 gimple stmt = gsi_stmt (si);
2286 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2287 /* Skip stmts that are not vectorized inside the loop. */
2288 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2289 && (!STMT_VINFO_LIVE_P (stmt_info)
2290 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
2292 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
2293 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
2294 some of the "outside" costs are generated inside the outer-loop. */
2295 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
2299 scalar_single_iter_cost = vect_get_single_scalar_iteraion_cost (loop_vinfo);
2301 /* Add additional cost for the peeled instructions in prologue and epilogue
2304 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
2305 at compile-time - we assume it's vf/2 (the worst would be vf-1).
2307 TODO: Build an expression that represents peel_iters for prologue and
2308 epilogue to be used in a run-time test. */
2312 peel_iters_prologue = vf/2;
2313 if (vect_print_dump_info (REPORT_COST))
2314 fprintf (vect_dump, "cost model: "
2315 "prologue peel iters set to vf/2.");
2317 /* If peeling for alignment is unknown, loop bound of main loop becomes
2319 peel_iters_epilogue = vf/2;
2320 if (vect_print_dump_info (REPORT_COST))
2321 fprintf (vect_dump, "cost model: "
2322 "epilogue peel iters set to vf/2 because "
2323 "peeling for alignment is unknown .");
2325 /* If peeled iterations are unknown, count a taken branch and a not taken
2326 branch per peeled loop. Even if scalar loop iterations are known,
2327 vector iterations are not known since peeled prologue iterations are
2328 not known. Hence guards remain the same. */
2329 peel_guard_costs += 2 * (vect_get_cost (cond_branch_taken)
2330 + vect_get_cost (cond_branch_not_taken));
2331 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
2332 + (peel_iters_epilogue * scalar_single_iter_cost)
2337 peel_iters_prologue = npeel;
2338 vec_outside_cost += vect_get_known_peeling_cost (loop_vinfo,
2339 peel_iters_prologue, &peel_iters_epilogue,
2340 scalar_single_iter_cost);
2343 /* FORNOW: The scalar outside cost is incremented in one of the
2346 1. The vectorizer checks for alignment and aliasing and generates
2347 a condition that allows dynamic vectorization. A cost model
2348 check is ANDED with the versioning condition. Hence scalar code
2349 path now has the added cost of the versioning check.
2351 if (cost > th & versioning_check)
2354 Hence run-time scalar is incremented by not-taken branch cost.
2356 2. The vectorizer then checks if a prologue is required. If the
2357 cost model check was not done before during versioning, it has to
2358 be done before the prologue check.
2361 prologue = scalar_iters
2366 if (prologue == num_iters)
2369 Hence the run-time scalar cost is incremented by a taken branch,
2370 plus a not-taken branch, plus a taken branch cost.
2372 3. The vectorizer then checks if an epilogue is required. If the
2373 cost model check was not done before during prologue check, it
2374 has to be done with the epilogue check.
2380 if (prologue == num_iters)
2383 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
2386 Hence the run-time scalar cost should be incremented by 2 taken
2389 TODO: The back end may reorder the BBS's differently and reverse
2390 conditions/branch directions. Change the estimates below to
2391 something more reasonable. */
2393 /* If the number of iterations is known and we do not do versioning, we can
2394 decide whether to vectorize at compile time. Hence the scalar version
2395 do not carry cost model guard costs. */
2396 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2397 || LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2398 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2400 /* Cost model check occurs at versioning. */
2401 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
2402 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
2403 scalar_outside_cost += vect_get_cost (cond_branch_not_taken);
2406 /* Cost model check occurs at prologue generation. */
2407 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2408 scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken)
2409 + vect_get_cost (cond_branch_not_taken);
2410 /* Cost model check occurs at epilogue generation. */
2412 scalar_outside_cost += 2 * vect_get_cost (cond_branch_taken);
2416 /* Add SLP costs. */
2417 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2418 FOR_EACH_VEC_ELT (slp_instance, slp_instances, i, instance)
2420 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
2421 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
2424 /* Calculate number of iterations required to make the vector version
2425 profitable, relative to the loop bodies only. The following condition
2427 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
2429 SIC = scalar iteration cost, VIC = vector iteration cost,
2430 VOC = vector outside cost, VF = vectorization factor,
2431 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
2432 SOC = scalar outside cost for run time cost model check. */
2434 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
2436 if (vec_outside_cost <= 0)
2437 min_profitable_iters = 1;
2440 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
2441 - vec_inside_cost * peel_iters_prologue
2442 - vec_inside_cost * peel_iters_epilogue)
2443 / ((scalar_single_iter_cost * vf)
2446 if ((scalar_single_iter_cost * vf * min_profitable_iters)
2447 <= ((vec_inside_cost * min_profitable_iters)
2448 + ((vec_outside_cost - scalar_outside_cost) * vf)))
2449 min_profitable_iters++;
2452 /* vector version will never be profitable. */
2455 if (vect_print_dump_info (REPORT_COST))
2456 fprintf (vect_dump, "cost model: the vector iteration cost = %d "
2457 "divided by the scalar iteration cost = %d "
2458 "is greater or equal to the vectorization factor = %d.",
2459 vec_inside_cost, scalar_single_iter_cost, vf);
2463 if (vect_print_dump_info (REPORT_COST))
2465 fprintf (vect_dump, "Cost model analysis: \n");
2466 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
2468 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
2470 fprintf (vect_dump, " Scalar iteration cost: %d\n",
2471 scalar_single_iter_cost);
2472 fprintf (vect_dump, " Scalar outside cost: %d\n", scalar_outside_cost);
2473 fprintf (vect_dump, " prologue iterations: %d\n",
2474 peel_iters_prologue);
2475 fprintf (vect_dump, " epilogue iterations: %d\n",
2476 peel_iters_epilogue);
2477 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
2478 min_profitable_iters);
2481 min_profitable_iters =
2482 min_profitable_iters < vf ? vf : min_profitable_iters;
2484 /* Because the condition we create is:
2485 if (niters <= min_profitable_iters)
2486 then skip the vectorized loop. */
2487 min_profitable_iters--;
2489 if (vect_print_dump_info (REPORT_COST))
2490 fprintf (vect_dump, " Profitability threshold = %d\n",
2491 min_profitable_iters);
2493 return min_profitable_iters;
2497 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
2498 functions. Design better to avoid maintenance issues. */
2500 /* Function vect_model_reduction_cost.
2502 Models cost for a reduction operation, including the vector ops
2503 generated within the strip-mine loop, the initial definition before
2504 the loop, and the epilogue code that must be generated. */
2507 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
2511 enum tree_code code;
2514 gimple stmt, orig_stmt;
2516 enum machine_mode mode;
2517 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2518 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2521 /* Cost of reduction op inside loop. */
2522 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2523 += ncopies * vect_get_cost (vector_stmt);
2525 stmt = STMT_VINFO_STMT (stmt_info);
2527 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2529 case GIMPLE_SINGLE_RHS:
2530 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2531 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2533 case GIMPLE_UNARY_RHS:
2534 reduction_op = gimple_assign_rhs1 (stmt);
2536 case GIMPLE_BINARY_RHS:
2537 reduction_op = gimple_assign_rhs2 (stmt);
2539 case GIMPLE_TERNARY_RHS:
2540 reduction_op = gimple_assign_rhs3 (stmt);
2546 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2549 if (vect_print_dump_info (REPORT_COST))
2551 fprintf (vect_dump, "unsupported data-type ");
2552 print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
2557 mode = TYPE_MODE (vectype);
2558 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2561 orig_stmt = STMT_VINFO_STMT (stmt_info);
2563 code = gimple_assign_rhs_code (orig_stmt);
2565 /* Add in cost for initial definition. */
2566 outer_cost += vect_get_cost (scalar_to_vec);
2568 /* Determine cost of epilogue code.
2570 We have a reduction operator that will reduce the vector in one statement.
2571 Also requires scalar extract. */
2573 if (!nested_in_vect_loop_p (loop, orig_stmt))
2575 if (reduc_code != ERROR_MARK)
2576 outer_cost += vect_get_cost (vector_stmt)
2577 + vect_get_cost (vec_to_scalar);
2580 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2582 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
2583 int element_bitsize = tree_low_cst (bitsize, 1);
2584 int nelements = vec_size_in_bits / element_bitsize;
2586 optab = optab_for_tree_code (code, vectype, optab_default);
2588 /* We have a whole vector shift available. */
2589 if (VECTOR_MODE_P (mode)
2590 && optab_handler (optab, mode) != CODE_FOR_nothing
2591 && optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
2592 /* Final reduction via vector shifts and the reduction operator. Also
2593 requires scalar extract. */
2594 outer_cost += ((exact_log2(nelements) * 2)
2595 * vect_get_cost (vector_stmt)
2596 + vect_get_cost (vec_to_scalar));
2598 /* Use extracts and reduction op for final reduction. For N elements,
2599 we have N extracts and N-1 reduction ops. */
2600 outer_cost += ((nelements + nelements - 1)
2601 * vect_get_cost (vector_stmt));
2605 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
2607 if (vect_print_dump_info (REPORT_COST))
2608 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
2609 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2610 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2616 /* Function vect_model_induction_cost.
2618 Models cost for induction operations. */
2621 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
2623 /* loop cost for vec_loop. */
2624 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info)
2625 = ncopies * vect_get_cost (vector_stmt);
2626 /* prologue cost for vec_init and vec_step. */
2627 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info)
2628 = 2 * vect_get_cost (scalar_to_vec);
2630 if (vect_print_dump_info (REPORT_COST))
2631 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
2632 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
2633 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
2637 /* Function get_initial_def_for_induction
2640 STMT - a stmt that performs an induction operation in the loop.
2641 IV_PHI - the initial value of the induction variable
2644 Return a vector variable, initialized with the first VF values of
2645 the induction variable. E.g., for an iv with IV_PHI='X' and
2646 evolution S, for a vector of 4 units, we want to return:
2647 [X, X + S, X + 2*S, X + 3*S]. */
2650 get_initial_def_for_induction (gimple iv_phi)
2652 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
2653 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2654 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2658 edge pe = loop_preheader_edge (loop);
2659 struct loop *iv_loop;
2661 tree vec, vec_init, vec_step, t;
2665 gimple init_stmt, induction_phi, new_stmt;
2666 tree induc_def, vec_def, vec_dest;
2667 tree init_expr, step_expr;
2668 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2673 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
2674 bool nested_in_vect_loop = false;
2675 gimple_seq stmts = NULL;
2676 imm_use_iterator imm_iter;
2677 use_operand_p use_p;
2681 gimple_stmt_iterator si;
2682 basic_block bb = gimple_bb (iv_phi);
2686 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
2687 if (nested_in_vect_loop_p (loop, iv_phi))
2689 nested_in_vect_loop = true;
2690 iv_loop = loop->inner;
2694 gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
2696 latch_e = loop_latch_edge (iv_loop);
2697 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
2699 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
2700 gcc_assert (access_fn);
2701 STRIP_NOPS (access_fn);
2702 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
2703 &init_expr, &step_expr);
2705 pe = loop_preheader_edge (iv_loop);
2707 scalar_type = TREE_TYPE (init_expr);
2708 vectype = get_vectype_for_scalar_type (scalar_type);
2709 resvectype = get_vectype_for_scalar_type (TREE_TYPE (PHI_RESULT (iv_phi)));
2710 gcc_assert (vectype);
2711 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2712 ncopies = vf / nunits;
2714 gcc_assert (phi_info);
2715 gcc_assert (ncopies >= 1);
2717 /* Find the first insertion point in the BB. */
2718 si = gsi_after_labels (bb);
2720 /* Create the vector that holds the initial_value of the induction. */
2721 if (nested_in_vect_loop)
2723 /* iv_loop is nested in the loop to be vectorized. init_expr had already
2724 been created during vectorization of previous stmts. We obtain it
2725 from the STMT_VINFO_VEC_STMT of the defining stmt. */
2726 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi,
2727 loop_preheader_edge (iv_loop));
2728 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
2732 /* iv_loop is the loop to be vectorized. Create:
2733 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
2734 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
2735 add_referenced_var (new_var);
2737 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
2740 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2741 gcc_assert (!new_bb);
2745 t = tree_cons (NULL_TREE, new_name, t);
2746 for (i = 1; i < nunits; i++)
2748 /* Create: new_name_i = new_name + step_expr */
2749 enum tree_code code = POINTER_TYPE_P (scalar_type)
2750 ? POINTER_PLUS_EXPR : PLUS_EXPR;
2751 init_stmt = gimple_build_assign_with_ops (code, new_var,
2752 new_name, step_expr);
2753 new_name = make_ssa_name (new_var, init_stmt);
2754 gimple_assign_set_lhs (init_stmt, new_name);
2756 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
2757 gcc_assert (!new_bb);
2759 if (vect_print_dump_info (REPORT_DETAILS))
2761 fprintf (vect_dump, "created new init_stmt: ");
2762 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
2764 t = tree_cons (NULL_TREE, new_name, t);
2766 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
2767 vec = build_constructor_from_list (vectype, nreverse (t));
2768 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
2772 /* Create the vector that holds the step of the induction. */
2773 if (nested_in_vect_loop)
2774 /* iv_loop is nested in the loop to be vectorized. Generate:
2775 vec_step = [S, S, S, S] */
2776 new_name = step_expr;
2779 /* iv_loop is the loop to be vectorized. Generate:
2780 vec_step = [VF*S, VF*S, VF*S, VF*S] */
2781 expr = build_int_cst (TREE_TYPE (step_expr), vf);
2782 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
2786 t = unshare_expr (new_name);
2787 gcc_assert (CONSTANT_CLASS_P (new_name));
2788 stepvectype = get_vectype_for_scalar_type (TREE_TYPE (new_name));
2789 gcc_assert (stepvectype);
2790 vec = build_vector_from_val (stepvectype, t);
2791 vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
2794 /* Create the following def-use cycle:
2799 vec_iv = PHI <vec_init, vec_loop>
2803 vec_loop = vec_iv + vec_step; */
2805 /* Create the induction-phi that defines the induction-operand. */
2806 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
2807 add_referenced_var (vec_dest);
2808 induction_phi = create_phi_node (vec_dest, iv_loop->header);
2809 set_vinfo_for_stmt (induction_phi,
2810 new_stmt_vec_info (induction_phi, loop_vinfo, NULL));
2811 induc_def = PHI_RESULT (induction_phi);
2813 /* Create the iv update inside the loop */
2814 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
2815 induc_def, vec_step);
2816 vec_def = make_ssa_name (vec_dest, new_stmt);
2817 gimple_assign_set_lhs (new_stmt, vec_def);
2818 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2819 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo,
2822 /* Set the arguments of the phi node: */
2823 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
2824 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
2828 /* In case that vectorization factor (VF) is bigger than the number
2829 of elements that we can fit in a vectype (nunits), we have to generate
2830 more than one vector stmt - i.e - we need to "unroll" the
2831 vector stmt by a factor VF/nunits. For more details see documentation
2832 in vectorizable_operation. */
2836 stmt_vec_info prev_stmt_vinfo;
2837 /* FORNOW. This restriction should be relaxed. */
2838 gcc_assert (!nested_in_vect_loop);
2840 /* Create the vector that holds the step of the induction. */
2841 expr = build_int_cst (TREE_TYPE (step_expr), nunits);
2842 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
2844 t = unshare_expr (new_name);
2845 gcc_assert (CONSTANT_CLASS_P (new_name));
2846 vec = build_vector_from_val (stepvectype, t);
2847 vec_step = vect_init_vector (iv_phi, vec, stepvectype, NULL);
2849 vec_def = induc_def;
2850 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
2851 for (i = 1; i < ncopies; i++)
2853 /* vec_i = vec_prev + vec_step */
2854 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
2856 vec_def = make_ssa_name (vec_dest, new_stmt);
2857 gimple_assign_set_lhs (new_stmt, vec_def);
2859 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2860 if (!useless_type_conversion_p (resvectype, vectype))
2862 new_stmt = gimple_build_assign_with_ops
2864 vect_get_new_vect_var (resvectype, vect_simple_var,
2866 build1 (VIEW_CONVERT_EXPR, resvectype,
2867 gimple_assign_lhs (new_stmt)), NULL_TREE);
2868 gimple_assign_set_lhs (new_stmt,
2870 (gimple_assign_lhs (new_stmt), new_stmt));
2871 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2873 set_vinfo_for_stmt (new_stmt,
2874 new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
2875 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
2876 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
2880 if (nested_in_vect_loop)
2882 /* Find the loop-closed exit-phi of the induction, and record
2883 the final vector of induction results: */
2885 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
2887 if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
2889 exit_phi = USE_STMT (use_p);
2895 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2896 /* FORNOW. Currently not supporting the case that an inner-loop induction
2897 is not used in the outer-loop (i.e. only outside the outer-loop). */
2898 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2899 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2901 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
2902 if (vect_print_dump_info (REPORT_DETAILS))
2904 fprintf (vect_dump, "vector of inductions after inner-loop:");
2905 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
2911 if (vect_print_dump_info (REPORT_DETAILS))
2913 fprintf (vect_dump, "transform induction: created def-use cycle: ");
2914 print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
2915 fprintf (vect_dump, "\n");
2916 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
2919 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
2920 if (!useless_type_conversion_p (resvectype, vectype))
2922 new_stmt = gimple_build_assign_with_ops
2924 vect_get_new_vect_var (resvectype, vect_simple_var, "vec_iv_"),
2925 build1 (VIEW_CONVERT_EXPR, resvectype, induc_def), NULL_TREE);
2926 induc_def = make_ssa_name (gimple_assign_lhs (new_stmt), new_stmt);
2927 gimple_assign_set_lhs (new_stmt, induc_def);
2928 si = gsi_start_bb (bb);
2929 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
2930 set_vinfo_for_stmt (new_stmt,
2931 new_stmt_vec_info (new_stmt, loop_vinfo, NULL));
2932 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_stmt))
2933 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (induction_phi));
2940 /* Function get_initial_def_for_reduction
2943 STMT - a stmt that performs a reduction operation in the loop.
2944 INIT_VAL - the initial value of the reduction variable
2947 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2948 of the reduction (used for adjusting the epilog - see below).
2949 Return a vector variable, initialized according to the operation that STMT
2950 performs. This vector will be used as the initial value of the
2951 vector of partial results.
2953 Option1 (adjust in epilog): Initialize the vector as follows:
2954 add/bit or/xor: [0,0,...,0,0]
2955 mult/bit and: [1,1,...,1,1]
2956 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
2957 and when necessary (e.g. add/mult case) let the caller know
2958 that it needs to adjust the result by init_val.
2960 Option2: Initialize the vector as follows:
2961 add/bit or/xor: [init_val,0,0,...,0]
2962 mult/bit and: [init_val,1,1,...,1]
2963 min/max/cond_expr: [init_val,init_val,...,init_val]
2964 and no adjustments are needed.
2966 For example, for the following code:
2972 STMT is 's = s + a[i]', and the reduction variable is 's'.
2973 For a vector of 4 units, we want to return either [0,0,0,init_val],
2974 or [0,0,0,0] and let the caller know that it needs to adjust
2975 the result at the end by 'init_val'.
2977 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2978 initialization vector is simpler (same element in all entries), if
2979 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
2981 A cost model should help decide between these two schemes. */
2984 get_initial_def_for_reduction (gimple stmt, tree init_val,
2985 tree *adjustment_def)
2987 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2988 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2989 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2990 tree scalar_type = TREE_TYPE (init_val);
2991 tree vectype = get_vectype_for_scalar_type (scalar_type);
2993 enum tree_code code = gimple_assign_rhs_code (stmt);
2998 bool nested_in_vect_loop = false;
3000 REAL_VALUE_TYPE real_init_val = dconst0;
3001 int int_init_val = 0;
3002 gimple def_stmt = NULL;
3004 gcc_assert (vectype);
3005 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3007 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3008 || SCALAR_FLOAT_TYPE_P (scalar_type));
3010 if (nested_in_vect_loop_p (loop, stmt))
3011 nested_in_vect_loop = true;
3013 gcc_assert (loop == (gimple_bb (stmt))->loop_father);
3015 /* In case of double reduction we only create a vector variable to be put
3016 in the reduction phi node. The actual statement creation is done in
3017 vect_create_epilog_for_reduction. */
3018 if (adjustment_def && nested_in_vect_loop
3019 && TREE_CODE (init_val) == SSA_NAME
3020 && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3021 && gimple_code (def_stmt) == GIMPLE_PHI
3022 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
3023 && vinfo_for_stmt (def_stmt)
3024 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
3025 == vect_double_reduction_def)
3027 *adjustment_def = NULL;
3028 return vect_create_destination_var (init_val, vectype);
3031 if (TREE_CONSTANT (init_val))
3033 if (SCALAR_FLOAT_TYPE_P (scalar_type))
3034 init_value = build_real (scalar_type, TREE_REAL_CST (init_val));
3036 init_value = build_int_cst (scalar_type, TREE_INT_CST_LOW (init_val));
3039 init_value = init_val;
3043 case WIDEN_SUM_EXPR:
3051 /* ADJUSMENT_DEF is NULL when called from
3052 vect_create_epilog_for_reduction to vectorize double reduction. */
3055 if (nested_in_vect_loop)
3056 *adjustment_def = vect_get_vec_def_for_operand (init_val, stmt,
3059 *adjustment_def = init_val;
3062 if (code == MULT_EXPR)
3064 real_init_val = dconst1;
3068 if (code == BIT_AND_EXPR)
3071 if (SCALAR_FLOAT_TYPE_P (scalar_type))
3072 def_for_init = build_real (scalar_type, real_init_val);
3074 def_for_init = build_int_cst (scalar_type, int_init_val);
3076 /* Create a vector of '0' or '1' except the first element. */
3077 for (i = nunits - 2; i >= 0; --i)
3078 t = tree_cons (NULL_TREE, def_for_init, t);
3080 /* Option1: the first element is '0' or '1' as well. */
3083 t = tree_cons (NULL_TREE, def_for_init, t);
3084 init_def = build_vector (vectype, t);
3088 /* Option2: the first element is INIT_VAL. */
3089 t = tree_cons (NULL_TREE, init_value, t);
3090 if (TREE_CONSTANT (init_val))
3091 init_def = build_vector (vectype, t);
3093 init_def = build_constructor_from_list (vectype, t);
3102 *adjustment_def = NULL_TREE;
3103 init_def = vect_get_vec_def_for_operand (init_val, stmt, NULL);
3107 init_def = build_vector_from_val (vectype, init_value);
3118 /* Function vect_create_epilog_for_reduction
3120 Create code at the loop-epilog to finalize the result of a reduction
3123 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
3124 reduction statements.
3125 STMT is the scalar reduction stmt that is being vectorized.
3126 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
3127 number of elements that we can fit in a vectype (nunits). In this case
3128 we have to generate more than one vector stmt - i.e - we need to "unroll"
3129 the vector stmt by a factor VF/nunits. For more details see documentation
3130 in vectorizable_operation.
3131 REDUC_CODE is the tree-code for the epilog reduction.
3132 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
3134 REDUC_INDEX is the index of the operand in the right hand side of the
3135 statement that is defined by REDUCTION_PHI.
3136 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
3137 SLP_NODE is an SLP node containing a group of reduction statements. The
3138 first one in this group is STMT.
3141 1. Creates the reduction def-use cycles: sets the arguments for
3143 The loop-entry argument is the vectorized initial-value of the reduction.
3144 The loop-latch argument is taken from VECT_DEFS - the vector of partial
3146 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
3147 by applying the operation specified by REDUC_CODE if available, or by
3148 other means (whole-vector shifts or a scalar loop).
3149 The function also creates a new phi node at the loop exit to preserve
3150 loop-closed form, as illustrated below.
3152 The flow at the entry to this function:
3155 vec_def = phi <null, null> # REDUCTION_PHI
3156 VECT_DEF = vector_stmt # vectorized form of STMT
3157 s_loop = scalar_stmt # (scalar) STMT
3159 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3163 The above is transformed by this function into:
3166 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3167 VECT_DEF = vector_stmt # vectorized form of STMT
3168 s_loop = scalar_stmt # (scalar) STMT
3170 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3171 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3172 v_out2 = reduce <v_out1>
3173 s_out3 = extract_field <v_out2, 0>
3174 s_out4 = adjust_result <s_out3>
3180 vect_create_epilog_for_reduction (VEC (tree, heap) *vect_defs, gimple stmt,
3181 int ncopies, enum tree_code reduc_code,
3182 VEC (gimple, heap) *reduction_phis,
3183 int reduc_index, bool double_reduc,
3186 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3187 stmt_vec_info prev_phi_info;
3189 enum machine_mode mode;
3190 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3191 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
3192 basic_block exit_bb;
3195 gimple new_phi = NULL, phi;
3196 gimple_stmt_iterator exit_gsi;
3198 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
3199 gimple epilog_stmt = NULL;
3200 enum tree_code code = gimple_assign_rhs_code (stmt);
3202 tree bitsize, bitpos;
3203 tree adjustment_def = NULL;
3204 tree vec_initial_def = NULL;
3205 tree reduction_op, expr, def;
3206 tree orig_name, scalar_result;
3207 imm_use_iterator imm_iter, phi_imm_iter;
3208 use_operand_p use_p, phi_use_p;
3209 bool extract_scalar_result = false;
3210 gimple use_stmt, orig_stmt, reduction_phi = NULL;
3211 bool nested_in_vect_loop = false;
3212 VEC (gimple, heap) *new_phis = NULL;
3213 enum vect_def_type dt = vect_unknown_def_type;
3215 VEC (tree, heap) *scalar_results = NULL;
3216 unsigned int group_size = 1, k, ratio;
3217 VEC (tree, heap) *vec_initial_defs = NULL;
3218 VEC (gimple, heap) *phis;
3221 group_size = VEC_length (gimple, SLP_TREE_SCALAR_STMTS (slp_node));
3223 if (nested_in_vect_loop_p (loop, stmt))
3227 nested_in_vect_loop = true;
3228 gcc_assert (!slp_node);
3231 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
3233 case GIMPLE_SINGLE_RHS:
3234 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt))
3236 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), reduc_index);
3238 case GIMPLE_UNARY_RHS:
3239 reduction_op = gimple_assign_rhs1 (stmt);
3241 case GIMPLE_BINARY_RHS:
3242 reduction_op = reduc_index ?
3243 gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
3245 case GIMPLE_TERNARY_RHS:
3246 reduction_op = gimple_op (stmt, reduc_index + 1);
3252 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
3253 gcc_assert (vectype);
3254 mode = TYPE_MODE (vectype);
3256 /* 1. Create the reduction def-use cycle:
3257 Set the arguments of REDUCTION_PHIS, i.e., transform
3260 vec_def = phi <null, null> # REDUCTION_PHI
3261 VECT_DEF = vector_stmt # vectorized form of STMT
3267 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
3268 VECT_DEF = vector_stmt # vectorized form of STMT
3271 (in case of SLP, do it for all the phis). */
3273 /* Get the loop-entry arguments. */
3275 vect_get_slp_defs (reduction_op, NULL_TREE, slp_node, &vec_initial_defs,
3279 vec_initial_defs = VEC_alloc (tree, heap, 1);
3280 /* For the case of reduction, vect_get_vec_def_for_operand returns
3281 the scalar def before the loop, that defines the initial value
3282 of the reduction variable. */
3283 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
3285 VEC_quick_push (tree, vec_initial_defs, vec_initial_def);
3288 /* Set phi nodes arguments. */
3289 FOR_EACH_VEC_ELT (gimple, reduction_phis, i, phi)
3291 tree vec_init_def = VEC_index (tree, vec_initial_defs, i);
3292 tree def = VEC_index (tree, vect_defs, i);
3293 for (j = 0; j < ncopies; j++)
3295 /* Set the loop-entry arg of the reduction-phi. */
3296 add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
3299 /* Set the loop-latch arg for the reduction-phi. */
3301 def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def);
3303 add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
3305 if (vect_print_dump_info (REPORT_DETAILS))
3307 fprintf (vect_dump, "transform reduction: created def-use"
3309 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3310 fprintf (vect_dump, "\n");
3311 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0,
3315 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
3319 VEC_free (tree, heap, vec_initial_defs);
3321 /* 2. Create epilog code.
3322 The reduction epilog code operates across the elements of the vector
3323 of partial results computed by the vectorized loop.
3324 The reduction epilog code consists of:
3326 step 1: compute the scalar result in a vector (v_out2)
3327 step 2: extract the scalar result (s_out3) from the vector (v_out2)
3328 step 3: adjust the scalar result (s_out3) if needed.
3330 Step 1 can be accomplished using one the following three schemes:
3331 (scheme 1) using reduc_code, if available.
3332 (scheme 2) using whole-vector shifts, if available.
3333 (scheme 3) using a scalar loop. In this case steps 1+2 above are
3336 The overall epilog code looks like this:
3338 s_out0 = phi <s_loop> # original EXIT_PHI
3339 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3340 v_out2 = reduce <v_out1> # step 1
3341 s_out3 = extract_field <v_out2, 0> # step 2
3342 s_out4 = adjust_result <s_out3> # step 3
3344 (step 3 is optional, and steps 1 and 2 may be combined).
3345 Lastly, the uses of s_out0 are replaced by s_out4. */
3348 /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
3349 v_out1 = phi <VECT_DEF>
3350 Store them in NEW_PHIS. */
3352 exit_bb = single_exit (loop)->dest;
3353 prev_phi_info = NULL;
3354 new_phis = VEC_alloc (gimple, heap, VEC_length (tree, vect_defs));
3355 FOR_EACH_VEC_ELT (tree, vect_defs, i, def)
3357 for (j = 0; j < ncopies; j++)
3359 phi = create_phi_node (SSA_NAME_VAR (def), exit_bb);
3360 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo, NULL));
3362 VEC_quick_push (gimple, new_phis, phi);
3365 def = vect_get_vec_def_for_stmt_copy (dt, def);
3366 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
3369 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
3370 prev_phi_info = vinfo_for_stmt (phi);
3374 /* The epilogue is created for the outer-loop, i.e., for the loop being
3379 exit_bb = single_exit (loop)->dest;
3382 exit_gsi = gsi_after_labels (exit_bb);
3384 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
3385 (i.e. when reduc_code is not available) and in the final adjustment
3386 code (if needed). Also get the original scalar reduction variable as
3387 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
3388 represents a reduction pattern), the tree-code and scalar-def are
3389 taken from the original stmt that the pattern-stmt (STMT) replaces.
3390 Otherwise (it is a regular reduction) - the tree-code and scalar-def
3391 are taken from STMT. */
3393 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3396 /* Regular reduction */
3401 /* Reduction pattern */
3402 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
3403 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
3404 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3407 code = gimple_assign_rhs_code (orig_stmt);
3408 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
3409 partial results are added and not subtracted. */
3410 if (code == MINUS_EXPR)
3413 scalar_dest = gimple_assign_lhs (orig_stmt);
3414 scalar_type = TREE_TYPE (scalar_dest);
3415 scalar_results = VEC_alloc (tree, heap, group_size);
3416 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
3417 bitsize = TYPE_SIZE (scalar_type);
3419 /* In case this is a reduction in an inner-loop while vectorizing an outer
3420 loop - we don't need to extract a single scalar result at the end of the
3421 inner-loop (unless it is double reduction, i.e., the use of reduction is
3422 outside the outer-loop). The final vector of partial results will be used
3423 in the vectorized outer-loop, or reduced to a scalar result at the end of
3425 if (nested_in_vect_loop && !double_reduc)
3426 goto vect_finalize_reduction;
3428 /* 2.3 Create the reduction code, using one of the three schemes described
3429 above. In SLP we simply need to extract all the elements from the
3430 vector (without reducing them), so we use scalar shifts. */
3431 if (reduc_code != ERROR_MARK && !slp_node)
3435 /*** Case 1: Create:
3436 v_out2 = reduc_expr <v_out1> */
3438 if (vect_print_dump_info (REPORT_DETAILS))
3439 fprintf (vect_dump, "Reduce using direct vector reduction.");
3441 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3442 new_phi = VEC_index (gimple, new_phis, 0);
3443 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
3444 epilog_stmt = gimple_build_assign (vec_dest, tmp);
3445 new_temp = make_ssa_name (vec_dest, epilog_stmt);
3446 gimple_assign_set_lhs (epilog_stmt, new_temp);
3447 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3449 extract_scalar_result = true;
3453 enum tree_code shift_code = ERROR_MARK;
3454 bool have_whole_vector_shift = true;
3456 int element_bitsize = tree_low_cst (bitsize, 1);
3457 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3460 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3461 shift_code = VEC_RSHIFT_EXPR;
3463 have_whole_vector_shift = false;
3465 /* Regardless of whether we have a whole vector shift, if we're
3466 emulating the operation via tree-vect-generic, we don't want
3467 to use it. Only the first round of the reduction is likely
3468 to still be profitable via emulation. */
3469 /* ??? It might be better to emit a reduction tree code here, so that
3470 tree-vect-generic can expand the first round via bit tricks. */
3471 if (!VECTOR_MODE_P (mode))
3472 have_whole_vector_shift = false;
3475 optab optab = optab_for_tree_code (code, vectype, optab_default);
3476 if (optab_handler (optab, mode) == CODE_FOR_nothing)
3477 have_whole_vector_shift = false;
3480 if (have_whole_vector_shift && !slp_node)
3482 /*** Case 2: Create:
3483 for (offset = VS/2; offset >= element_size; offset/=2)
3485 Create: va' = vec_shift <va, offset>
3486 Create: va = vop <va, va'>
3489 if (vect_print_dump_info (REPORT_DETAILS))
3490 fprintf (vect_dump, "Reduce using vector shifts");
3492 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3493 new_phi = VEC_index (gimple, new_phis, 0);
3494 new_temp = PHI_RESULT (new_phi);
3495 for (bit_offset = vec_size_in_bits/2;
3496 bit_offset >= element_bitsize;
3499 tree bitpos = size_int (bit_offset);
3501 epilog_stmt = gimple_build_assign_with_ops (shift_code,
3502 vec_dest, new_temp, bitpos);
3503 new_name = make_ssa_name (vec_dest, epilog_stmt);
3504 gimple_assign_set_lhs (epilog_stmt, new_name);
3505 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3507 epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
3508 new_name, new_temp);
3509 new_temp = make_ssa_name (vec_dest, epilog_stmt);
3510 gimple_assign_set_lhs (epilog_stmt, new_temp);
3511 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3514 extract_scalar_result = true;
3520 /*** Case 3: Create:
3521 s = extract_field <v_out2, 0>
3522 for (offset = element_size;
3523 offset < vector_size;
3524 offset += element_size;)
3526 Create: s' = extract_field <v_out2, offset>
3527 Create: s = op <s, s'> // For non SLP cases
3530 if (vect_print_dump_info (REPORT_DETAILS))
3531 fprintf (vect_dump, "Reduce using scalar code. ");
3533 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
3534 FOR_EACH_VEC_ELT (gimple, new_phis, i, new_phi)
3536 vec_temp = PHI_RESULT (new_phi);
3537 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
3539 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3540 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3541 gimple_assign_set_lhs (epilog_stmt, new_temp);
3542 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3544 /* In SLP we don't need to apply reduction operation, so we just
3545 collect s' values in SCALAR_RESULTS. */
3547 VEC_safe_push (tree, heap, scalar_results, new_temp);
3549 for (bit_offset = element_bitsize;
3550 bit_offset < vec_size_in_bits;
3551 bit_offset += element_bitsize)
3553 tree bitpos = bitsize_int (bit_offset);
3554 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp,
3557 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3558 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
3559 gimple_assign_set_lhs (epilog_stmt, new_name);
3560 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3564 /* In SLP we don't need to apply reduction operation, so
3565 we just collect s' values in SCALAR_RESULTS. */
3566 new_temp = new_name;
3567 VEC_safe_push (tree, heap, scalar_results, new_name);
3571 epilog_stmt = gimple_build_assign_with_ops (code,
3572 new_scalar_dest, new_name, new_temp);
3573 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3574 gimple_assign_set_lhs (epilog_stmt, new_temp);
3575 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3580 /* The only case where we need to reduce scalar results in SLP, is
3581 unrolling. If the size of SCALAR_RESULTS is greater than
3582 GROUP_SIZE, we reduce them combining elements modulo
3586 tree res, first_res, new_res;
3589 /* Reduce multiple scalar results in case of SLP unrolling. */
3590 for (j = group_size; VEC_iterate (tree, scalar_results, j, res);
3593 first_res = VEC_index (tree, scalar_results, j % group_size);
3594 new_stmt = gimple_build_assign_with_ops (code,
3595 new_scalar_dest, first_res, res);
3596 new_res = make_ssa_name (new_scalar_dest, new_stmt);
3597 gimple_assign_set_lhs (new_stmt, new_res);
3598 gsi_insert_before (&exit_gsi, new_stmt, GSI_SAME_STMT);
3599 VEC_replace (tree, scalar_results, j % group_size, new_res);
3603 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
3604 VEC_safe_push (tree, heap, scalar_results, new_temp);
3606 extract_scalar_result = false;
3610 /* 2.4 Extract the final scalar result. Create:
3611 s_out3 = extract_field <v_out2, bitpos> */
3613 if (extract_scalar_result)
3617 if (vect_print_dump_info (REPORT_DETAILS))
3618 fprintf (vect_dump, "extract scalar result");
3620 if (BYTES_BIG_ENDIAN)
3621 bitpos = size_binop (MULT_EXPR,
3622 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
3623 TYPE_SIZE (scalar_type));
3625 bitpos = bitsize_zero_node;
3627 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
3628 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
3629 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
3630 gimple_assign_set_lhs (epilog_stmt, new_temp);
3631 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3632 VEC_safe_push (tree, heap, scalar_results, new_temp);
3635 vect_finalize_reduction:
3640 /* 2.5 Adjust the final result by the initial value of the reduction
3641 variable. (When such adjustment is not needed, then
3642 'adjustment_def' is zero). For example, if code is PLUS we create:
3643 new_temp = loop_exit_def + adjustment_def */
3647 gcc_assert (!slp_node);
3648 if (nested_in_vect_loop)
3650 new_phi = VEC_index (gimple, new_phis, 0);
3651 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
3652 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
3653 new_dest = vect_create_destination_var (scalar_dest, vectype);
3657 new_temp = VEC_index (tree, scalar_results, 0);
3658 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
3659 expr = build2 (code, scalar_type, new_temp, adjustment_def);
3660 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
3663 epilog_stmt = gimple_build_assign (new_dest, expr);
3664 new_temp = make_ssa_name (new_dest, epilog_stmt);
3665 gimple_assign_set_lhs (epilog_stmt, new_temp);
3666 SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
3667 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
3668 if (nested_in_vect_loop)
3670 set_vinfo_for_stmt (epilog_stmt,
3671 new_stmt_vec_info (epilog_stmt, loop_vinfo,
3673 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
3674 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
3677 VEC_quick_push (tree, scalar_results, new_temp);
3679 VEC_replace (tree, scalar_results, 0, new_temp);
3682 VEC_replace (tree, scalar_results, 0, new_temp);
3684 VEC_replace (gimple, new_phis, 0, epilog_stmt);
3687 /* 2.6 Handle the loop-exit phis. Replace the uses of scalar loop-exit
3688 phis with new adjusted scalar results, i.e., replace use <s_out0>
3693 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3694 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3695 v_out2 = reduce <v_out1>
3696 s_out3 = extract_field <v_out2, 0>
3697 s_out4 = adjust_result <s_out3>
3704 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
3705 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
3706 v_out2 = reduce <v_out1>
3707 s_out3 = extract_field <v_out2, 0>
3708 s_out4 = adjust_result <s_out3>
3712 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
3713 case that GROUP_SIZE is greater than vectorization factor). Therefore, we
3714 need to match SCALAR_RESULTS with corresponding statements. The first
3715 (GROUP_SIZE / number of new vector stmts) scalar results correspond to
3716 the first vector stmt, etc.
3717 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */
3718 if (group_size > VEC_length (gimple, new_phis))
3720 ratio = group_size / VEC_length (gimple, new_phis);
3721 gcc_assert (!(group_size % VEC_length (gimple, new_phis)));
3726 for (k = 0; k < group_size; k++)
3730 epilog_stmt = VEC_index (gimple, new_phis, k / ratio);
3731 reduction_phi = VEC_index (gimple, reduction_phis, k / ratio);
3736 gimple current_stmt = VEC_index (gimple,
3737 SLP_TREE_SCALAR_STMTS (slp_node), k);
3739 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt));
3740 /* SLP statements can't participate in patterns. */
3741 gcc_assert (!orig_stmt);
3742 scalar_dest = gimple_assign_lhs (current_stmt);
3745 phis = VEC_alloc (gimple, heap, 3);
3746 /* Find the loop-closed-use at the loop exit of the original scalar
3747 result. (The reduction result is expected to have two immediate uses -
3748 one at the latch block, and one at the loop exit). */
3749 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
3750 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3751 VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
3753 /* We expect to have found an exit_phi because of loop-closed-ssa
3755 gcc_assert (!VEC_empty (gimple, phis));
3757 FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
3761 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi);
3764 /* FORNOW. Currently not supporting the case that an inner-loop
3765 reduction is not used in the outer-loop (but only outside the
3766 outer-loop), unless it is double reduction. */
3767 gcc_assert ((STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
3768 && !STMT_VINFO_LIVE_P (exit_phi_vinfo))
3771 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt;
3773 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
3774 != vect_double_reduction_def)
3777 /* Handle double reduction:
3779 stmt1: s1 = phi <s0, s2> - double reduction phi (outer loop)
3780 stmt2: s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
3781 stmt3: s4 = use (s3) - (regular) reduc stmt (inner loop)
3782 stmt4: s2 = phi <s4> - double reduction stmt (outer loop)
3784 At that point the regular reduction (stmt2 and stmt3) is
3785 already vectorized, as well as the exit phi node, stmt4.
3786 Here we vectorize the phi node of double reduction, stmt1, and
3787 update all relevant statements. */
3789 /* Go through all the uses of s2 to find double reduction phi
3790 node, i.e., stmt1 above. */
3791 orig_name = PHI_RESULT (exit_phi);
3792 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
3794 stmt_vec_info use_stmt_vinfo = vinfo_for_stmt (use_stmt);
3795 stmt_vec_info new_phi_vinfo;
3796 tree vect_phi_init, preheader_arg, vect_phi_res, init_def;
3797 basic_block bb = gimple_bb (use_stmt);
3800 /* Check that USE_STMT is really double reduction phi
3802 if (gimple_code (use_stmt) != GIMPLE_PHI
3803 || gimple_phi_num_args (use_stmt) != 2
3805 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
3806 != vect_double_reduction_def
3807 || bb->loop_father != outer_loop)
3810 /* Create vector phi node for double reduction:
3811 vs1 = phi <vs0, vs2>
3812 vs1 was created previously in this function by a call to
3813 vect_get_vec_def_for_operand and is stored in
3815 vs2 is defined by EPILOG_STMT, the vectorized EXIT_PHI;
3816 vs0 is created here. */
3818 /* Create vector phi node. */
3819 vect_phi = create_phi_node (vec_initial_def, bb);
3820 new_phi_vinfo = new_stmt_vec_info (vect_phi,
3821 loop_vec_info_for_loop (outer_loop), NULL);
3822 set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
3824 /* Create vs0 - initial def of the double reduction phi. */
3825 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
3826 loop_preheader_edge (outer_loop));
3827 init_def = get_initial_def_for_reduction (stmt,
3828 preheader_arg, NULL);
3829 vect_phi_init = vect_init_vector (use_stmt, init_def,
3832 /* Update phi node arguments with vs0 and vs2. */
3833 add_phi_arg (vect_phi, vect_phi_init,
3834 loop_preheader_edge (outer_loop),
3836 add_phi_arg (vect_phi, PHI_RESULT (epilog_stmt),
3837 loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
3838 if (vect_print_dump_info (REPORT_DETAILS))
3840 fprintf (vect_dump, "created double reduction phi "
3842 print_gimple_stmt (vect_dump, vect_phi, 0, TDF_SLIM);
3845 vect_phi_res = PHI_RESULT (vect_phi);
3847 /* Replace the use, i.e., set the correct vs1 in the regular
3848 reduction phi node. FORNOW, NCOPIES is always 1, so the
3849 loop is redundant. */
3850 use = reduction_phi;
3851 for (j = 0; j < ncopies; j++)
3853 edge pr_edge = loop_preheader_edge (loop);
3854 SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res);
3855 use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use));
3861 VEC_free (gimple, heap, phis);
3862 if (nested_in_vect_loop)
3870 phis = VEC_alloc (gimple, heap, 3);
3871 /* Find the loop-closed-use at the loop exit of the original scalar
3872 result. (The reduction result is expected to have two immediate uses,
3873 one at the latch block, and one at the loop exit). For double
3874 reductions we are looking for exit phis of the outer loop. */
3875 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
3877 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
3878 VEC_safe_push (gimple, heap, phis, USE_STMT (use_p));
3881 if (double_reduc && gimple_code (USE_STMT (use_p)) == GIMPLE_PHI)
3883 tree phi_res = PHI_RESULT (USE_STMT (use_p));
3885 FOR_EACH_IMM_USE_FAST (phi_use_p, phi_imm_iter, phi_res)
3887 if (!flow_bb_inside_loop_p (loop,
3888 gimple_bb (USE_STMT (phi_use_p))))
3889 VEC_safe_push (gimple, heap, phis,
3890 USE_STMT (phi_use_p));
3896 FOR_EACH_VEC_ELT (gimple, phis, i, exit_phi)
3898 /* Replace the uses: */
3899 orig_name = PHI_RESULT (exit_phi);
3900 scalar_result = VEC_index (tree, scalar_results, k);
3901 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
3902 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
3903 SET_USE (use_p, scalar_result);
3906 VEC_free (gimple, heap, phis);
3909 VEC_free (tree, heap, scalar_results);
3910 VEC_free (gimple, heap, new_phis);
3914 /* Function vectorizable_reduction.
3916 Check if STMT performs a reduction operation that can be vectorized.
3917 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3918 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
3919 Return FALSE if not a vectorizable STMT, TRUE otherwise.
3921 This function also handles reduction idioms (patterns) that have been
3922 recognized in advance during vect_pattern_recog. In this case, STMT may be
3924 X = pattern_expr (arg0, arg1, ..., X)
3925 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
3926 sequence that had been detected and replaced by the pattern-stmt (STMT).
3928 In some cases of reduction patterns, the type of the reduction variable X is
3929 different than the type of the other arguments of STMT.
3930 In such cases, the vectype that is used when transforming STMT into a vector
3931 stmt is different than the vectype that is used to determine the
3932 vectorization factor, because it consists of a different number of elements
3933 than the actual number of elements that are being operated upon in parallel.
3935 For example, consider an accumulation of shorts into an int accumulator.
3936 On some targets it's possible to vectorize this pattern operating on 8
3937 shorts at a time (hence, the vectype for purposes of determining the
3938 vectorization factor should be V8HI); on the other hand, the vectype that
3939 is used to create the vector form is actually V4SI (the type of the result).
3941 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
3942 indicates what is the actual level of parallelism (V8HI in the example), so
3943 that the right vectorization factor would be derived. This vectype
3944 corresponds to the type of arguments to the reduction stmt, and should *NOT*
3945 be used to create the vectorized stmt. The right vectype for the vectorized
3946 stmt is obtained from the type of the result X:
3947 get_vectype_for_scalar_type (TREE_TYPE (X))
3949 This means that, contrary to "regular" reductions (or "regular" stmts in
3950 general), the following equation:
3951 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
3952 does *NOT* necessarily hold for reduction patterns. */
3955 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
3956 gimple *vec_stmt, slp_tree slp_node)
3960 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
3961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3962 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
3963 tree vectype_in = NULL_TREE;
3964 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3965 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3966 enum tree_code code, orig_code, epilog_reduc_code;
3967 enum machine_mode vec_mode;
3969 optab optab, reduc_optab;
3970 tree new_temp = NULL_TREE;
3973 enum vect_def_type dt;
3974 gimple new_phi = NULL;
3978 stmt_vec_info orig_stmt_info;
3979 tree expr = NULL_TREE;
3983 stmt_vec_info prev_stmt_info, prev_phi_info;
3984 bool single_defuse_cycle = false;
3985 tree reduc_def = NULL_TREE;
3986 gimple new_stmt = NULL;
3989 bool nested_cycle = false, found_nested_cycle_def = false;
3990 gimple reduc_def_stmt = NULL;
3991 /* The default is that the reduction variable is the last in statement. */
3992 int reduc_index = 2;
3993 bool double_reduc = false, dummy;
3995 struct loop * def_stmt_loop, *outer_loop = NULL;
3997 gimple def_arg_stmt;
3998 VEC (tree, heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL, *vect_defs = NULL;
3999 VEC (gimple, heap) *phis = NULL;
4001 tree def0, def1, tem;
4003 if (nested_in_vect_loop_p (loop, stmt))
4007 nested_cycle = true;
4010 /* 1. Is vectorizable reduction? */
4011 /* Not supportable if the reduction variable is used in the loop. */
4012 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
4015 /* Reductions that are not used even in an enclosing outer-loop,
4016 are expected to be "live" (used out of the loop). */
4017 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
4018 && !STMT_VINFO_LIVE_P (stmt_info))
4021 /* Make sure it was already recognized as a reduction computation. */
4022 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
4023 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
4026 /* 2. Has this been recognized as a reduction pattern?
4028 Check if STMT represents a pattern that has been recognized
4029 in earlier analysis stages. For stmts that represent a pattern,
4030 the STMT_VINFO_RELATED_STMT field records the last stmt in
4031 the original sequence that constitutes the pattern. */
4033 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
4036 orig_stmt_info = vinfo_for_stmt (orig_stmt);
4037 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
4038 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4039 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
4042 /* 3. Check the operands of the operation. The first operands are defined
4043 inside the loop body. The last operand is the reduction variable,
4044 which is defined by the loop-header-phi. */
4046 gcc_assert (is_gimple_assign (stmt));
4049 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
4051 case GIMPLE_SINGLE_RHS:
4052 op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
4053 if (op_type == ternary_op)
4055 tree rhs = gimple_assign_rhs1 (stmt);
4056 ops[0] = TREE_OPERAND (rhs, 0);
4057 ops[1] = TREE_OPERAND (rhs, 1);
4058 ops[2] = TREE_OPERAND (rhs, 2);
4059 code = TREE_CODE (rhs);
4065 case GIMPLE_BINARY_RHS:
4066 code = gimple_assign_rhs_code (stmt);
4067 op_type = TREE_CODE_LENGTH (code);
4068 gcc_assert (op_type == binary_op);
4069 ops[0] = gimple_assign_rhs1 (stmt);
4070 ops[1] = gimple_assign_rhs2 (stmt);
4073 case GIMPLE_TERNARY_RHS:
4074 code = gimple_assign_rhs_code (stmt);
4075 op_type = TREE_CODE_LENGTH (code);
4076 gcc_assert (op_type == ternary_op);
4077 ops[0] = gimple_assign_rhs1 (stmt);
4078 ops[1] = gimple_assign_rhs2 (stmt);
4079 ops[2] = gimple_assign_rhs3 (stmt);
4082 case GIMPLE_UNARY_RHS:
4089 scalar_dest = gimple_assign_lhs (stmt);
4090 scalar_type = TREE_TYPE (scalar_dest);
4091 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
4092 && !SCALAR_FLOAT_TYPE_P (scalar_type))
4095 /* All uses but the last are expected to be defined in the loop.
4096 The last use is the reduction variable. In case of nested cycle this
4097 assumption is not true: we use reduc_index to record the index of the
4098 reduction variable. */
4099 for (i = 0; i < op_type-1; i++)
4101 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
4102 if (i == 0 && code == COND_EXPR)
4105 is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL,
4106 &def_stmt, &def, &dt, &tem);
4109 gcc_assert (is_simple_use);
4110 if (dt != vect_internal_def
4111 && dt != vect_external_def
4112 && dt != vect_constant_def
4113 && dt != vect_induction_def
4114 && !(dt == vect_nested_cycle && nested_cycle))
4117 if (dt == vect_nested_cycle)
4119 found_nested_cycle_def = true;
4120 reduc_def_stmt = def_stmt;
4125 is_simple_use = vect_is_simple_use_1 (ops[i], loop_vinfo, NULL, &def_stmt,
4129 gcc_assert (is_simple_use);
4130 gcc_assert (dt == vect_reduction_def
4131 || dt == vect_nested_cycle
4132 || ((dt == vect_internal_def || dt == vect_external_def
4133 || dt == vect_constant_def || dt == vect_induction_def)
4134 && nested_cycle && found_nested_cycle_def));
4135 if (!found_nested_cycle_def)
4136 reduc_def_stmt = def_stmt;
4138 gcc_assert (gimple_code (reduc_def_stmt) == GIMPLE_PHI);
4140 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo,
4145 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, reduc_def_stmt,
4146 !nested_cycle, &dummy));
4148 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt)))
4151 if (slp_node || PURE_SLP_STMT (stmt_info))
4154 ncopies = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4155 / TYPE_VECTOR_SUBPARTS (vectype_in));
4157 gcc_assert (ncopies >= 1);
4159 vec_mode = TYPE_MODE (vectype_in);
4161 if (code == COND_EXPR)
4163 if (!vectorizable_condition (stmt, gsi, NULL, ops[reduc_index], 0))
4165 if (vect_print_dump_info (REPORT_DETAILS))
4166 fprintf (vect_dump, "unsupported condition in reduction");
4173 /* 4. Supportable by target? */
4175 /* 4.1. check support for the operation in the loop */
4176 optab = optab_for_tree_code (code, vectype_in, optab_default);
4179 if (vect_print_dump_info (REPORT_DETAILS))
4180 fprintf (vect_dump, "no optab.");
4185 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
4187 if (vect_print_dump_info (REPORT_DETAILS))
4188 fprintf (vect_dump, "op not supported by target.");
4190 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
4191 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4192 < vect_min_worthwhile_factor (code))
4195 if (vect_print_dump_info (REPORT_DETAILS))
4196 fprintf (vect_dump, "proceeding using word mode.");
4199 /* Worthwhile without SIMD support? */
4200 if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
4201 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
4202 < vect_min_worthwhile_factor (code))
4204 if (vect_print_dump_info (REPORT_DETAILS))
4205 fprintf (vect_dump, "not worthwhile without SIMD support.");
4211 /* 4.2. Check support for the epilog operation.
4213 If STMT represents a reduction pattern, then the type of the
4214 reduction variable may be different than the type of the rest
4215 of the arguments. For example, consider the case of accumulation
4216 of shorts into an int accumulator; The original code:
4217 S1: int_a = (int) short_a;
4218 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
4221 STMT: int_acc = widen_sum <short_a, int_acc>
4224 1. The tree-code that is used to create the vector operation in the
4225 epilog code (that reduces the partial results) is not the
4226 tree-code of STMT, but is rather the tree-code of the original
4227 stmt from the pattern that STMT is replacing. I.e, in the example
4228 above we want to use 'widen_sum' in the loop, but 'plus' in the
4230 2. The type (mode) we use to check available target support
4231 for the vector operation to be created in the *epilog*, is
4232 determined by the type of the reduction variable (in the example
4233 above we'd check this: optab_handler (plus_optab, vect_int_mode])).
4234 However the type (mode) we use to check available target support
4235 for the vector operation to be created *inside the loop*, is
4236 determined by the type of the other arguments to STMT (in the
4237 example we'd check this: optab_handler (widen_sum_optab,
4240 This is contrary to "regular" reductions, in which the types of all
4241 the arguments are the same as the type of the reduction variable.
4242 For "regular" reductions we can therefore use the same vector type
4243 (and also the same tree-code) when generating the epilog code and
4244 when generating the code inside the loop. */
4248 /* This is a reduction pattern: get the vectype from the type of the
4249 reduction variable, and get the tree-code from orig_stmt. */
4250 orig_code = gimple_assign_rhs_code (orig_stmt);
4251 gcc_assert (vectype_out);
4252 vec_mode = TYPE_MODE (vectype_out);
4256 /* Regular reduction: use the same vectype and tree-code as used for
4257 the vector code inside the loop can be used for the epilog code. */
4263 def_bb = gimple_bb (reduc_def_stmt);
4264 def_stmt_loop = def_bb->loop_father;
4265 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4266 loop_preheader_edge (def_stmt_loop));
4267 if (TREE_CODE (def_arg) == SSA_NAME
4268 && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg))
4269 && gimple_code (def_arg_stmt) == GIMPLE_PHI
4270 && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt))
4271 && vinfo_for_stmt (def_arg_stmt)
4272 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
4273 == vect_double_reduction_def)
4274 double_reduc = true;
4277 epilog_reduc_code = ERROR_MARK;
4278 if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
4280 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
4284 if (vect_print_dump_info (REPORT_DETAILS))
4285 fprintf (vect_dump, "no optab for reduction.");
4287 epilog_reduc_code = ERROR_MARK;
4291 && optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
4293 if (vect_print_dump_info (REPORT_DETAILS))
4294 fprintf (vect_dump, "reduc op not supported by target.");
4296 epilog_reduc_code = ERROR_MARK;
4301 if (!nested_cycle || double_reduc)
4303 if (vect_print_dump_info (REPORT_DETAILS))
4304 fprintf (vect_dump, "no reduc code for scalar code.");
4310 if (double_reduc && ncopies > 1)
4312 if (vect_print_dump_info (REPORT_DETAILS))
4313 fprintf (vect_dump, "multiple types in double reduction");
4318 if (!vec_stmt) /* transformation not required. */
4320 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
4321 if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
4328 if (vect_print_dump_info (REPORT_DETAILS))
4329 fprintf (vect_dump, "transform reduction.");
4331 /* FORNOW: Multiple types are not supported for condition. */
4332 if (code == COND_EXPR)
4333 gcc_assert (ncopies == 1);
4335 /* Create the destination vector */
4336 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4338 /* In case the vectorization factor (VF) is bigger than the number
4339 of elements that we can fit in a vectype (nunits), we have to generate
4340 more than one vector stmt - i.e - we need to "unroll" the
4341 vector stmt by a factor VF/nunits. For more details see documentation
4342 in vectorizable_operation. */
4344 /* If the reduction is used in an outer loop we need to generate
4345 VF intermediate results, like so (e.g. for ncopies=2):
4350 (i.e. we generate VF results in 2 registers).
4351 In this case we have a separate def-use cycle for each copy, and therefore
4352 for each copy we get the vector def for the reduction variable from the
4353 respective phi node created for this copy.
4355 Otherwise (the reduction is unused in the loop nest), we can combine
4356 together intermediate results, like so (e.g. for ncopies=2):
4360 (i.e. we generate VF/2 results in a single register).
4361 In this case for each copy we get the vector def for the reduction variable
4362 from the vectorized reduction operation generated in the previous iteration.
4365 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope)
4367 single_defuse_cycle = true;
4371 epilog_copies = ncopies;
4373 prev_stmt_info = NULL;
4374 prev_phi_info = NULL;
4377 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4378 gcc_assert (TYPE_VECTOR_SUBPARTS (vectype_out)
4379 == TYPE_VECTOR_SUBPARTS (vectype_in));
4384 vec_oprnds0 = VEC_alloc (tree, heap, 1);
4385 if (op_type == ternary_op)
4386 vec_oprnds1 = VEC_alloc (tree, heap, 1);
4389 phis = VEC_alloc (gimple, heap, vec_num);
4390 vect_defs = VEC_alloc (tree, heap, vec_num);
4392 VEC_quick_push (tree, vect_defs, NULL_TREE);
4394 for (j = 0; j < ncopies; j++)
4396 if (j == 0 || !single_defuse_cycle)
4398 for (i = 0; i < vec_num; i++)
4400 /* Create the reduction-phi that defines the reduction
4402 new_phi = create_phi_node (vec_dest, loop->header);
4403 set_vinfo_for_stmt (new_phi,
4404 new_stmt_vec_info (new_phi, loop_vinfo,
4406 if (j == 0 || slp_node)
4407 VEC_quick_push (gimple, phis, new_phi);
4411 if (code == COND_EXPR)
4413 gcc_assert (!slp_node);
4414 vectorizable_condition (stmt, gsi, vec_stmt,
4415 PHI_RESULT (VEC_index (gimple, phis, 0)),
4417 /* Multiple types are not supported for condition. */
4424 tree op0, op1 = NULL_TREE;
4426 op0 = ops[!reduc_index];
4427 if (op_type == ternary_op)
4429 if (reduc_index == 0)
4436 vect_get_slp_defs (op0, op1, slp_node, &vec_oprnds0, &vec_oprnds1,
4440 loop_vec_def0 = vect_get_vec_def_for_operand (ops[!reduc_index],
4442 VEC_quick_push (tree, vec_oprnds0, loop_vec_def0);
4443 if (op_type == ternary_op)
4445 loop_vec_def1 = vect_get_vec_def_for_operand (op1, stmt,
4447 VEC_quick_push (tree, vec_oprnds1, loop_vec_def1);
4455 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
4456 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
4457 VEC_replace (tree, vec_oprnds0, 0, loop_vec_def0);
4458 if (op_type == ternary_op)
4460 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt,
4462 VEC_replace (tree, vec_oprnds1, 0, loop_vec_def1);
4466 if (single_defuse_cycle)
4467 reduc_def = gimple_assign_lhs (new_stmt);
4469 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
4472 FOR_EACH_VEC_ELT (tree, vec_oprnds0, i, def0)
4475 reduc_def = PHI_RESULT (VEC_index (gimple, phis, i));
4478 if (!single_defuse_cycle || j == 0)
4479 reduc_def = PHI_RESULT (new_phi);
4482 def1 = ((op_type == ternary_op)
4483 ? VEC_index (tree, vec_oprnds1, i) : NULL);
4484 if (op_type == binary_op)
4486 if (reduc_index == 0)
4487 expr = build2 (code, vectype_out, reduc_def, def0);
4489 expr = build2 (code, vectype_out, def0, reduc_def);
4493 if (reduc_index == 0)
4494 expr = build3 (code, vectype_out, reduc_def, def0, def1);
4497 if (reduc_index == 1)
4498 expr = build3 (code, vectype_out, def0, reduc_def, def1);
4500 expr = build3 (code, vectype_out, def0, def1, reduc_def);
4504 new_stmt = gimple_build_assign (vec_dest, expr);
4505 new_temp = make_ssa_name (vec_dest, new_stmt);
4506 gimple_assign_set_lhs (new_stmt, new_temp);
4507 vect_finish_stmt_generation (stmt, new_stmt, gsi);
4510 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4511 VEC_quick_push (tree, vect_defs, new_temp);
4514 VEC_replace (tree, vect_defs, 0, new_temp);
4521 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4523 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4525 prev_stmt_info = vinfo_for_stmt (new_stmt);
4526 prev_phi_info = vinfo_for_stmt (new_phi);
4529 /* Finalize the reduction-phi (set its arguments) and create the
4530 epilog reduction code. */
4531 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
4533 new_temp = gimple_assign_lhs (*vec_stmt);
4534 VEC_replace (tree, vect_defs, 0, new_temp);
4537 vect_create_epilog_for_reduction (vect_defs, stmt, epilog_copies,
4538 epilog_reduc_code, phis, reduc_index,
4539 double_reduc, slp_node);
4541 VEC_free (gimple, heap, phis);
4542 VEC_free (tree, heap, vec_oprnds0);
4544 VEC_free (tree, heap, vec_oprnds1);
4549 /* Function vect_min_worthwhile_factor.
4551 For a loop where we could vectorize the operation indicated by CODE,
4552 return the minimum vectorization factor that makes it worthwhile
4553 to use generic vectors. */
4555 vect_min_worthwhile_factor (enum tree_code code)
4576 /* Function vectorizable_induction
4578 Check if PHI performs an induction computation that can be vectorized.
4579 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
4580 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
4581 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4584 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
4587 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
4588 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4589 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4590 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4591 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4592 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4595 gcc_assert (ncopies >= 1);
4596 /* FORNOW. This restriction should be relaxed. */
4597 if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
4599 if (vect_print_dump_info (REPORT_DETAILS))
4600 fprintf (vect_dump, "multiple types in nested loop.");
4604 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4607 /* FORNOW: SLP not supported. */
4608 if (STMT_SLP_TYPE (stmt_info))
4611 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
4613 if (gimple_code (phi) != GIMPLE_PHI)
4616 if (!vec_stmt) /* transformation not required. */
4618 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
4619 if (vect_print_dump_info (REPORT_DETAILS))
4620 fprintf (vect_dump, "=== vectorizable_induction ===");
4621 vect_model_induction_cost (stmt_info, ncopies);
4627 if (vect_print_dump_info (REPORT_DETAILS))
4628 fprintf (vect_dump, "transform induction phi.");
4630 vec_def = get_initial_def_for_induction (phi);
4631 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
4635 /* Function vectorizable_live_operation.
4637 STMT computes a value that is used outside the loop. Check if
4638 it can be supported. */
4641 vectorizable_live_operation (gimple stmt,
4642 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
4643 gimple *vec_stmt ATTRIBUTE_UNUSED)
4645 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4647 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4653 enum vect_def_type dt;
4654 enum tree_code code;
4655 enum gimple_rhs_class rhs_class;
4657 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4659 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4662 if (!is_gimple_assign (stmt))
4665 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
4668 /* FORNOW. CHECKME. */
4669 if (nested_in_vect_loop_p (loop, stmt))
4672 code = gimple_assign_rhs_code (stmt);
4673 op_type = TREE_CODE_LENGTH (code);
4674 rhs_class = get_gimple_rhs_class (code);
4675 gcc_assert (rhs_class != GIMPLE_UNARY_RHS || op_type == unary_op);
4676 gcc_assert (rhs_class != GIMPLE_BINARY_RHS || op_type == binary_op);
4678 /* FORNOW: support only if all uses are invariant. This means
4679 that the scalar operations can remain in place, unvectorized.
4680 The original last scalar value that they compute will be used. */
4682 for (i = 0; i < op_type; i++)
4684 if (rhs_class == GIMPLE_SINGLE_RHS)
4685 op = TREE_OPERAND (gimple_op (stmt, 1), i);
4687 op = gimple_op (stmt, i + 1);
4689 && !vect_is_simple_use (op, loop_vinfo, NULL, &def_stmt, &def, &dt))
4691 if (vect_print_dump_info (REPORT_DETAILS))
4692 fprintf (vect_dump, "use not simple.");
4696 if (dt != vect_external_def && dt != vect_constant_def)
4700 /* No transformation is required for the cases we currently support. */
4704 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */
4707 vect_loop_kill_debug_uses (struct loop *loop, gimple stmt)
4709 ssa_op_iter op_iter;
4710 imm_use_iterator imm_iter;
4711 def_operand_p def_p;
4714 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
4716 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
4720 if (!is_gimple_debug (ustmt))
4723 bb = gimple_bb (ustmt);
4725 if (!flow_bb_inside_loop_p (loop, bb))
4727 if (gimple_debug_bind_p (ustmt))
4729 if (vect_print_dump_info (REPORT_DETAILS))
4730 fprintf (vect_dump, "killing debug use");
4732 gimple_debug_bind_reset_value (ustmt);
4733 update_stmt (ustmt);
4742 /* Function vect_transform_loop.
4744 The analysis phase has determined that the loop is vectorizable.
4745 Vectorize the loop - created vectorized stmts to replace the scalar
4746 stmts in the loop, and update the loop exit condition. */
4749 vect_transform_loop (loop_vec_info loop_vinfo)
4751 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4752 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4753 int nbbs = loop->num_nodes;
4754 gimple_stmt_iterator si;
4757 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
4759 bool slp_scheduled = false;
4760 unsigned int nunits;
4761 tree cond_expr = NULL_TREE;
4762 gimple_seq cond_expr_stmt_list = NULL;
4763 bool do_peeling_for_loop_bound;
4765 if (vect_print_dump_info (REPORT_DETAILS))
4766 fprintf (vect_dump, "=== vec_transform_loop ===");
4768 /* Peel the loop if there are data refs with unknown alignment.
4769 Only one data ref with unknown store is allowed. */
4771 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
4772 vect_do_peeling_for_alignment (loop_vinfo);
4774 do_peeling_for_loop_bound
4775 = (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4776 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
4777 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0));
4779 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)
4780 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo))
4781 vect_loop_versioning (loop_vinfo,
4782 !do_peeling_for_loop_bound,
4783 &cond_expr, &cond_expr_stmt_list);
4785 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
4786 compile time constant), or it is a constant that doesn't divide by the
4787 vectorization factor, then an epilog loop needs to be created.
4788 We therefore duplicate the loop: the original loop will be vectorized,
4789 and will compute the first (n/VF) iterations. The second copy of the loop
4790 will remain scalar and will compute the remaining (n%VF) iterations.
4791 (VF is the vectorization factor). */
4793 if (do_peeling_for_loop_bound)
4794 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio,
4795 cond_expr, cond_expr_stmt_list);
4797 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
4798 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
4800 /* 1) Make sure the loop header has exactly two entries
4801 2) Make sure we have a preheader basic block. */
4803 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
4805 split_edge (loop_preheader_edge (loop));
4807 /* FORNOW: the vectorizer supports only loops which body consist
4808 of one basic block (header + empty latch). When the vectorizer will
4809 support more involved loop forms, the order by which the BBs are
4810 traversed need to be reconsidered. */
4812 for (i = 0; i < nbbs; i++)
4814 basic_block bb = bbs[i];
4815 stmt_vec_info stmt_info;
4818 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
4820 phi = gsi_stmt (si);
4821 if (vect_print_dump_info (REPORT_DETAILS))
4823 fprintf (vect_dump, "------>vectorizing phi: ");
4824 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
4826 stmt_info = vinfo_for_stmt (phi);
4830 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
4831 vect_loop_kill_debug_uses (loop, phi);
4833 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4834 && !STMT_VINFO_LIVE_P (stmt_info))
4837 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
4838 != (unsigned HOST_WIDE_INT) vectorization_factor)
4839 && vect_print_dump_info (REPORT_DETAILS))
4840 fprintf (vect_dump, "multiple-types.");
4842 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
4844 if (vect_print_dump_info (REPORT_DETAILS))
4845 fprintf (vect_dump, "transform phi.");
4846 vect_transform_stmt (phi, NULL, NULL, NULL, NULL);
4850 for (si = gsi_start_bb (bb); !gsi_end_p (si);)
4852 gimple stmt = gsi_stmt (si);
4855 if (vect_print_dump_info (REPORT_DETAILS))
4857 fprintf (vect_dump, "------>vectorizing statement: ");
4858 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
4861 stmt_info = vinfo_for_stmt (stmt);
4863 /* vector stmts created in the outer-loop during vectorization of
4864 stmts in an inner-loop may not have a stmt_info, and do not
4865 need to be vectorized. */
4872 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
4873 vect_loop_kill_debug_uses (loop, stmt);
4875 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4876 && !STMT_VINFO_LIVE_P (stmt_info))
4882 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
4884 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
4885 if (!STMT_SLP_TYPE (stmt_info)
4886 && nunits != (unsigned int) vectorization_factor
4887 && vect_print_dump_info (REPORT_DETAILS))
4888 /* For SLP VF is set according to unrolling factor, and not to
4889 vector size, hence for SLP this print is not valid. */
4890 fprintf (vect_dump, "multiple-types.");
4892 /* SLP. Schedule all the SLP instances when the first SLP stmt is
4894 if (STMT_SLP_TYPE (stmt_info))
4898 slp_scheduled = true;
4900 if (vect_print_dump_info (REPORT_DETAILS))
4901 fprintf (vect_dump, "=== scheduling SLP instances ===");
4903 vect_schedule_slp (loop_vinfo, NULL);
4906 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
4907 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
4914 /* -------- vectorize statement ------------ */
4915 if (vect_print_dump_info (REPORT_DETAILS))
4916 fprintf (vect_dump, "transform statement.");
4918 strided_store = false;
4919 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL, NULL);
4922 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4924 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
4925 interleaving chain was completed - free all the stores in
4927 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
4928 gsi_remove (&si, true);
4933 /* Free the attached stmt_vec_info and remove the stmt. */
4934 free_stmt_vec_info (stmt);
4935 gsi_remove (&si, true);
4943 slpeel_make_loop_iterate_ntimes (loop, ratio);
4945 /* The memory tags and pointers in vectorized statements need to
4946 have their SSA forms updated. FIXME, why can't this be delayed
4947 until all the loops have been transformed? */
4948 update_ssa (TODO_update_ssa);
4950 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
4951 fprintf (vect_dump, "LOOP VECTORIZED.");
4952 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))
4953 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");