1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
38 #include "tree-chrec.h"
39 #include "tree-data-ref.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
45 static bool vect_can_advance_ivs_p (loop_vec_info);
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always take
59 invariants out of the loop, and so in the case of promotion we also have to
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
65 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
66 HOST_WIDE_INT *rhs_size_unit)
68 tree scalar_type = gimple_expr_type (stmt);
69 HOST_WIDE_INT lhs, rhs;
71 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
73 if (is_gimple_assign (stmt)
74 && (gimple_assign_cast_p (stmt)
75 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
78 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
80 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
82 scalar_type = rhs_type;
91 /* Function vect_determine_vectorization_factor
93 Determine the vectorization factor (VF). VF is the number of data elements
94 that are operated upon in parallel in a single iteration of the vectorized
95 loop. For example, when vectorizing a loop that operates on 4byte elements,
96 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
97 elements can fit in a single vector register.
99 We currently support vectorization of loops in which all types operated upon
100 are of the same size. Therefore this function currently sets VF according to
101 the size of the types operated upon, and fails if there are multiple sizes
104 VF is also the factor by which the loop iterations are strip-mined, e.g.:
111 for (i=0; i<N; i+=VF){
112 a[i:VF] = b[i:VF] + c[i:VF];
117 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
119 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
120 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
121 int nbbs = loop->num_nodes;
122 gimple_stmt_iterator si;
123 unsigned int vectorization_factor = 0;
128 stmt_vec_info stmt_info;
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
135 for (i = 0; i < nbbs; i++)
137 basic_block bb = bbs[i];
139 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
142 stmt_info = vinfo_for_stmt (phi);
143 if (vect_print_dump_info (REPORT_DETAILS))
145 fprintf (vect_dump, "==> examining phi: ");
146 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
149 gcc_assert (stmt_info);
151 if (STMT_VINFO_RELEVANT_P (stmt_info))
153 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
154 scalar_type = TREE_TYPE (PHI_RESULT (phi));
156 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "get vectype for scalar type: ");
159 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
162 vectype = get_vectype_for_scalar_type (scalar_type);
165 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
168 "not vectorized: unsupported data-type ");
169 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
173 STMT_VINFO_VECTYPE (stmt_info) = vectype;
175 if (vect_print_dump_info (REPORT_DETAILS))
177 fprintf (vect_dump, "vectype: ");
178 print_generic_expr (vect_dump, vectype, TDF_SLIM);
181 nunits = TYPE_VECTOR_SUBPARTS (vectype);
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "nunits = %d", nunits);
185 if (!vectorization_factor
186 || (nunits > vectorization_factor))
187 vectorization_factor = nunits;
191 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
193 gimple stmt = gsi_stmt (si);
194 stmt_info = vinfo_for_stmt (stmt);
196 if (vect_print_dump_info (REPORT_DETAILS))
198 fprintf (vect_dump, "==> examining statement: ");
199 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
202 gcc_assert (stmt_info);
204 /* skip stmts which do not need to be vectorized. */
205 if (!STMT_VINFO_RELEVANT_P (stmt_info)
206 && !STMT_VINFO_LIVE_P (stmt_info))
208 if (vect_print_dump_info (REPORT_DETAILS))
209 fprintf (vect_dump, "skip.");
213 if (gimple_get_lhs (stmt) == NULL_TREE)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
217 fprintf (vect_dump, "not vectorized: irregular stmt.");
218 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
223 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
225 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
227 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
228 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
233 if (STMT_VINFO_VECTYPE (stmt_info))
235 /* The only case when a vectype had been already set is for stmts
236 that contain a dataref, or for "pattern-stmts" (stmts generated
237 by the vectorizer to represent/replace a certain idiom). */
238 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
239 || is_pattern_stmt_p (stmt_info));
240 vectype = STMT_VINFO_VECTYPE (stmt_info);
245 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
246 && !is_pattern_stmt_p (stmt_info));
248 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
250 if (vect_print_dump_info (REPORT_DETAILS))
252 fprintf (vect_dump, "get vectype for scalar type: ");
253 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
256 vectype = get_vectype_for_scalar_type (scalar_type);
259 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
262 "not vectorized: unsupported data-type ");
263 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
267 STMT_VINFO_VECTYPE (stmt_info) = vectype;
270 if (vect_print_dump_info (REPORT_DETAILS))
272 fprintf (vect_dump, "vectype: ");
273 print_generic_expr (vect_dump, vectype, TDF_SLIM);
276 nunits = TYPE_VECTOR_SUBPARTS (vectype);
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "nunits = %d", nunits);
280 if (!vectorization_factor
281 || (nunits > vectorization_factor))
282 vectorization_factor = nunits;
287 /* TODO: Analyze cost. Decide if worth while to vectorize. */
288 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
290 if (vectorization_factor <= 1)
292 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
293 fprintf (vect_dump, "not vectorized: unsupported data-type");
296 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
302 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
303 the number of created vector stmts depends on the unrolling factor). However,
304 the actual number of vector stmts for every SLP node depends on VF which is
305 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
306 In this function we assume that the inside costs calculated in
307 vect_model_xxx_cost are linear in ncopies. */
310 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
312 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
313 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
314 slp_instance instance;
316 if (vect_print_dump_info (REPORT_SLP))
317 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
319 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
320 /* We assume that costs are linear in ncopies. */
321 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
322 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
326 /* Function vect_analyze_operations.
328 Scan the loop stmts and make sure they are all vectorizable. */
331 vect_analyze_operations (loop_vec_info loop_vinfo)
333 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
334 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
335 int nbbs = loop->num_nodes;
336 gimple_stmt_iterator si;
337 unsigned int vectorization_factor = 0;
341 stmt_vec_info stmt_info;
342 bool need_to_vectorize = false;
343 int min_profitable_iters;
344 int min_scalar_loop_bound;
346 bool only_slp_in_loop = true;
348 if (vect_print_dump_info (REPORT_DETAILS))
349 fprintf (vect_dump, "=== vect_analyze_operations ===");
351 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
352 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
354 for (i = 0; i < nbbs; i++)
356 basic_block bb = bbs[i];
358 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
363 stmt_info = vinfo_for_stmt (phi);
364 if (vect_print_dump_info (REPORT_DETAILS))
366 fprintf (vect_dump, "examining phi: ");
367 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
370 if (! is_loop_header_bb_p (bb))
372 /* inner-loop loop-closed exit phi in outer-loop vectorization
373 (i.e. a phi in the tail of the outer-loop).
374 FORNOW: we currently don't support the case that these phis
375 are not used in the outerloop, cause this case requires
376 to actually do something here. */
377 if (!STMT_VINFO_RELEVANT_P (stmt_info)
378 || STMT_VINFO_LIVE_P (stmt_info))
380 if (vect_print_dump_info (REPORT_DETAILS))
382 "Unsupported loop-closed phi in outer-loop.");
388 gcc_assert (stmt_info);
390 if (STMT_VINFO_LIVE_P (stmt_info))
392 /* FORNOW: not yet supported. */
393 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
394 fprintf (vect_dump, "not vectorized: value used after loop.");
398 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
399 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
401 /* A scalar-dependence cycle that we don't support. */
402 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
403 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
407 if (STMT_VINFO_RELEVANT_P (stmt_info))
409 need_to_vectorize = true;
410 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
411 ok = vectorizable_induction (phi, NULL, NULL);
416 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
419 "not vectorized: relevant phi not supported: ");
420 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
426 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
428 gimple stmt = gsi_stmt (si);
429 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
430 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
432 if (vect_print_dump_info (REPORT_DETAILS))
434 fprintf (vect_dump, "==> examining statement: ");
435 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
438 gcc_assert (stmt_info);
440 /* skip stmts which do not need to be vectorized.
441 this is expected to include:
442 - the COND_EXPR which is the loop exit condition
443 - any LABEL_EXPRs in the loop
444 - computations that are used only for array indexing or loop
447 if (!STMT_VINFO_RELEVANT_P (stmt_info)
448 && !STMT_VINFO_LIVE_P (stmt_info))
450 if (vect_print_dump_info (REPORT_DETAILS))
451 fprintf (vect_dump, "irrelevant.");
455 switch (STMT_VINFO_DEF_TYPE (stmt_info))
460 case vect_reduction_def:
461 gcc_assert (relevance == vect_used_in_outer
462 || relevance == vect_used_in_outer_by_reduction
463 || relevance == vect_unused_in_loop);
466 case vect_induction_def:
467 case vect_constant_def:
468 case vect_invariant_def:
469 case vect_unknown_def_type:
474 if (STMT_VINFO_RELEVANT_P (stmt_info))
476 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))));
477 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
478 need_to_vectorize = true;
482 if (STMT_VINFO_RELEVANT_P (stmt_info)
483 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
484 ok = (vectorizable_type_promotion (stmt, NULL, NULL, NULL)
485 || vectorizable_type_demotion (stmt, NULL, NULL, NULL)
486 || vectorizable_conversion (stmt, NULL, NULL, NULL)
487 || vectorizable_operation (stmt, NULL, NULL, NULL)
488 || vectorizable_assignment (stmt, NULL, NULL, NULL)
489 || vectorizable_load (stmt, NULL, NULL, NULL)
490 || vectorizable_call (stmt, NULL, NULL)
491 || vectorizable_store (stmt, NULL, NULL, NULL)
492 || vectorizable_condition (stmt, NULL, NULL)
493 || vectorizable_reduction (stmt, NULL, NULL));
497 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
499 fprintf (vect_dump, "not vectorized: relevant stmt not ");
500 fprintf (vect_dump, "supported: ");
501 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
506 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
507 need extra handling, except for vectorizable reductions. */
508 if (STMT_VINFO_LIVE_P (stmt_info)
509 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
510 ok = vectorizable_live_operation (stmt, NULL, NULL);
514 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
516 fprintf (vect_dump, "not vectorized: live stmt not ");
517 fprintf (vect_dump, "supported: ");
518 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
523 if (!PURE_SLP_STMT (stmt_info))
525 /* STMT needs loop-based vectorization. */
526 only_slp_in_loop = false;
528 /* Groups of strided accesses whose size is not a power of 2 are
529 not vectorizable yet using loop-vectorization. Therefore, if
530 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
531 both SLPed and loop-based vectorized), the loop cannot be
533 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
534 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
535 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
537 if (vect_print_dump_info (REPORT_DETAILS))
539 fprintf (vect_dump, "not vectorized: the size of group "
540 "of strided accesses is not a power of 2");
541 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
549 /* All operations in the loop are either irrelevant (deal with loop
550 control, or dead), or only used outside the loop and can be moved
551 out of the loop (e.g. invariants, inductions). The loop can be
552 optimized away by scalar optimizations. We're better off not
553 touching this loop. */
554 if (!need_to_vectorize)
556 if (vect_print_dump_info (REPORT_DETAILS))
558 "All the computation can be taken out of the loop.");
559 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
561 "not vectorized: redundant loop. no profit to vectorize.");
565 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
566 vectorization factor of the loop is the unrolling factor required by the
567 SLP instances. If that unrolling factor is 1, we say, that we perform
568 pure SLP on loop - cross iteration parallelism is not exploited. */
569 if (only_slp_in_loop)
570 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
572 vectorization_factor = least_common_multiple (vectorization_factor,
573 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
575 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
577 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
578 && vect_print_dump_info (REPORT_DETAILS))
580 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
581 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
583 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
584 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
586 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
587 fprintf (vect_dump, "not vectorized: iteration count too small.");
588 if (vect_print_dump_info (REPORT_DETAILS))
589 fprintf (vect_dump,"not vectorized: iteration count smaller than "
590 "vectorization factor.");
594 /* Analyze cost. Decide if worth while to vectorize. */
596 /* Once VF is set, SLP costs should be updated since the number of created
597 vector stmts depends on VF. */
598 vect_update_slp_costs_according_to_vf (loop_vinfo);
600 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
601 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
603 if (min_profitable_iters < 0)
605 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
606 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
607 if (vect_print_dump_info (REPORT_DETAILS))
608 fprintf (vect_dump, "not vectorized: vector version will never be "
613 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
614 * vectorization_factor) - 1);
616 /* Use the cost model only if it is more conservative than user specified
619 th = (unsigned) min_scalar_loop_bound;
620 if (min_profitable_iters
621 && (!min_scalar_loop_bound
622 || min_profitable_iters > min_scalar_loop_bound))
623 th = (unsigned) min_profitable_iters;
625 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
626 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
629 fprintf (vect_dump, "not vectorized: vectorization not "
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "not vectorized: iteration count smaller than "
633 "user specified loop bound parameter or minimum "
634 "profitable iterations (whichever is more conservative).");
638 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
639 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
640 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
642 if (vect_print_dump_info (REPORT_DETAILS))
643 fprintf (vect_dump, "epilog loop required.");
644 if (!vect_can_advance_ivs_p (loop_vinfo))
646 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
648 "not vectorized: can't create epilog loop 1.");
651 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
653 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
655 "not vectorized: can't create epilog loop 2.");
664 /* Function exist_non_indexing_operands_for_use_p
666 USE is one of the uses attached to STMT. Check if USE is
667 used in STMT for anything other than indexing an array. */
670 exist_non_indexing_operands_for_use_p (tree use, gimple stmt)
673 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
675 /* USE corresponds to some operand in STMT. If there is no data
676 reference in STMT, then any operand that corresponds to USE
677 is not indexing an array. */
678 if (!STMT_VINFO_DATA_REF (stmt_info))
681 /* STMT has a data_ref. FORNOW this means that its of one of
685 (This should have been verified in analyze_data_refs).
687 'var' in the second case corresponds to a def, not a use,
688 so USE cannot correspond to any operands that are not used
691 Therefore, all we need to check is if STMT falls into the
692 first case, and whether var corresponds to USE. */
694 if (TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME)
697 if (!gimple_assign_copy_p (stmt))
699 operand = gimple_assign_rhs1 (stmt);
701 if (TREE_CODE (operand) != SSA_NAME)
711 /* Function vect_analyze_scalar_cycles_1.
713 Examine the cross iteration def-use cycles of scalar variables
714 in LOOP. LOOP_VINFO represents the loop that is now being
715 considered for vectorization (can be LOOP, or an outer-loop
719 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
721 basic_block bb = loop->header;
723 VEC(gimple,heap) *worklist = VEC_alloc (gimple, heap, 64);
724 gimple_stmt_iterator gsi;
726 if (vect_print_dump_info (REPORT_DETAILS))
727 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
729 /* First - identify all inductions. */
730 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
732 gimple phi = gsi_stmt (gsi);
733 tree access_fn = NULL;
734 tree def = PHI_RESULT (phi);
735 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
737 if (vect_print_dump_info (REPORT_DETAILS))
739 fprintf (vect_dump, "Analyze phi: ");
740 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
743 /* Skip virtual phi's. The data dependences that are associated with
744 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
745 if (!is_gimple_reg (SSA_NAME_VAR (def)))
748 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
750 /* Analyze the evolution function. */
751 access_fn = analyze_scalar_evolution (loop, def);
752 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
754 fprintf (vect_dump, "Access function of PHI: ");
755 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
759 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
761 VEC_safe_push (gimple, heap, worklist, phi);
765 if (vect_print_dump_info (REPORT_DETAILS))
766 fprintf (vect_dump, "Detected induction.");
767 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
771 /* Second - identify all reductions. */
772 while (VEC_length (gimple, worklist) > 0)
774 gimple phi = VEC_pop (gimple, worklist);
775 tree def = PHI_RESULT (phi);
776 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
779 if (vect_print_dump_info (REPORT_DETAILS))
781 fprintf (vect_dump, "Analyze phi: ");
782 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
785 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
786 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
788 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
791 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "Detected reduction.");
793 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
798 if (vect_print_dump_info (REPORT_DETAILS))
799 fprintf (vect_dump, "Unknown def-use cycle pattern.");
802 VEC_free (gimple, heap, worklist);
807 /* Function vect_analyze_scalar_cycles.
809 Examine the cross iteration def-use cycles of scalar variables, by
810 analyzing the loop-header PHIs of scalar variables; Classify each
811 cycle as one of the following: invariant, induction, reduction, unknown.
812 We do that for the loop represented by LOOP_VINFO, and also to its
813 inner-loop, if exists.
814 Examples for scalar cycles:
829 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
831 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
833 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
835 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
836 Reductions in such inner-loop therefore have different properties than
837 the reductions in the nest that gets vectorized:
838 1. When vectorized, they are executed in the same order as in the original
839 scalar loop, so we can't change the order of computation when
841 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
842 current checks are too strict. */
845 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
849 /* Function vect_insert_into_interleaving_chain.
851 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
854 vect_insert_into_interleaving_chain (struct data_reference *dra,
855 struct data_reference *drb)
859 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
860 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
862 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
863 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
866 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
867 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
870 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
871 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
875 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
878 /* We got to the end of the list. Insert here. */
879 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
880 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
884 /* Function vect_update_interleaving_chain.
886 For two data-refs DRA and DRB that are a part of a chain interleaved data
887 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
889 There are four possible cases:
890 1. New stmts - both DRA and DRB are not a part of any chain:
893 2. DRB is a part of a chain and DRA is not:
894 no need to update FIRST_DR
895 no need to insert DRB
896 insert DRA according to init
897 3. DRA is a part of a chain and DRB is not:
898 if (init of FIRST_DR > init of DRB)
900 NEXT(FIRST_DR) = previous FIRST_DR
902 insert DRB according to its init
903 4. both DRA and DRB are in some interleaving chains:
904 choose the chain with the smallest init of FIRST_DR
905 insert the nodes of the second chain into the first one. */
908 vect_update_interleaving_chain (struct data_reference *drb,
909 struct data_reference *dra)
911 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
912 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
913 tree next_init, init_dra_chain, init_drb_chain;
914 gimple first_a, first_b;
916 gimple node, prev, next, first_stmt;
918 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
919 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
921 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
922 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
923 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
927 /* 2. DRB is a part of a chain and DRA is not. */
928 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
930 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
931 /* Insert DRA into the chain of DRB. */
932 vect_insert_into_interleaving_chain (dra, drb);
936 /* 3. DRA is a part of a chain and DRB is not. */
937 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
939 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
940 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
944 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
946 /* DRB's init is smaller than the init of the stmt previously marked
947 as the first stmt of the interleaving chain of DRA. Therefore, we
948 update FIRST_STMT and put DRB in the head of the list. */
949 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
950 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
952 /* Update all the stmts in the list to point to the new FIRST_STMT. */
953 tmp = old_first_stmt;
956 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
957 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
962 /* Insert DRB in the list of DRA. */
963 vect_insert_into_interleaving_chain (drb, dra);
964 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
969 /* 4. both DRA and DRB are in some interleaving chains. */
970 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
971 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
972 if (first_a == first_b)
974 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
975 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
977 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
979 /* Insert the nodes of DRA chain into the DRB chain.
980 After inserting a node, continue from this node of the DRB chain (don't
981 start from the beginning. */
982 node = DR_GROUP_FIRST_DR (stmtinfo_a);
983 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
984 first_stmt = first_b;
988 /* Insert the nodes of DRB chain into the DRA chain.
989 After inserting a node, continue from this node of the DRA chain (don't
990 start from the beginning. */
991 node = DR_GROUP_FIRST_DR (stmtinfo_b);
992 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
993 first_stmt = first_a;
998 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
999 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1002 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1003 if (tree_int_cst_compare (next_init, node_init) > 0)
1006 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1007 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
1012 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1016 /* We got to the end of the list. Insert here. */
1017 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1018 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
1021 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1022 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1027 /* Function vect_equal_offsets.
1029 Check if OFFSET1 and OFFSET2 are identical expressions. */
1032 vect_equal_offsets (tree offset1, tree offset2)
1036 STRIP_NOPS (offset1);
1037 STRIP_NOPS (offset2);
1039 if (offset1 == offset2)
1042 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1043 || !BINARY_CLASS_P (offset1)
1044 || !BINARY_CLASS_P (offset2))
1047 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1048 TREE_OPERAND (offset2, 0));
1049 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1050 TREE_OPERAND (offset2, 1));
1052 return (res0 && res1);
1056 /* Function vect_check_interleaving.
1058 Check if DRA and DRB are a part of interleaving. In case they are, insert
1059 DRA and DRB in an interleaving chain. */
1062 vect_check_interleaving (struct data_reference *dra,
1063 struct data_reference *drb)
1065 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1067 /* Check that the data-refs have same first location (except init) and they
1068 are both either store or load (not load and store). */
1069 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1070 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1071 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1072 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1073 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1074 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1075 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1076 || DR_IS_READ (dra) != DR_IS_READ (drb))
1080 1. data-refs are of the same type
1081 2. their steps are equal
1082 3. the step is greater than the difference between data-refs' inits */
1083 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1084 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1086 if (type_size_a != type_size_b
1087 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1088 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1089 TREE_TYPE (DR_REF (drb))))
1092 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1093 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1094 step = TREE_INT_CST_LOW (DR_STEP (dra));
1096 if (init_a > init_b)
1098 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1099 and DRB is accessed before DRA. */
1100 diff_mod_size = (init_a - init_b) % type_size_a;
1102 if ((init_a - init_b) > step)
1105 if (diff_mod_size == 0)
1107 vect_update_interleaving_chain (drb, dra);
1108 if (vect_print_dump_info (REPORT_DR_DETAILS))
1110 fprintf (vect_dump, "Detected interleaving ");
1111 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1112 fprintf (vect_dump, " and ");
1113 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1120 /* If init_b == init_a + the size of the type * k, we have an
1121 interleaving, and DRA is accessed before DRB. */
1122 diff_mod_size = (init_b - init_a) % type_size_a;
1124 if ((init_b - init_a) > step)
1127 if (diff_mod_size == 0)
1129 vect_update_interleaving_chain (dra, drb);
1130 if (vect_print_dump_info (REPORT_DR_DETAILS))
1132 fprintf (vect_dump, "Detected interleaving ");
1133 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1134 fprintf (vect_dump, " and ");
1135 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1142 /* Check if data references pointed by DR_I and DR_J are same or
1143 belong to same interleaving group. Return FALSE if drs are
1144 different, otherwise return TRUE. */
1147 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1149 gimple stmt_i = DR_STMT (dr_i);
1150 gimple stmt_j = DR_STMT (dr_j);
1152 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1153 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1154 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1155 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1156 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1162 /* If address ranges represented by DDR_I and DDR_J are equal,
1163 return TRUE, otherwise return FALSE. */
1166 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1168 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1169 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1170 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1171 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1177 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1178 tested at run-time. Return TRUE if DDR was successfully inserted.
1179 Return false if versioning is not supported. */
1182 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1186 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1189 if (vect_print_dump_info (REPORT_DR_DETAILS))
1191 fprintf (vect_dump, "mark for run-time aliasing test between ");
1192 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1193 fprintf (vect_dump, " and ");
1194 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1199 if (vect_print_dump_info (REPORT_DR_DETAILS))
1200 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1204 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1207 if (vect_print_dump_info (REPORT_DR_DETAILS))
1208 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1212 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1216 /* Function vect_analyze_data_ref_dependence.
1218 Return TRUE if there (might) exist a dependence between a memory-reference
1219 DRA and a memory-reference DRB. When versioning for alias may check a
1220 dependence at run-time, return FALSE. */
1223 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1224 loop_vec_info loop_vinfo)
1227 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1228 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1229 struct data_reference *dra = DDR_A (ddr);
1230 struct data_reference *drb = DDR_B (ddr);
1231 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1232 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1233 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1234 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1235 lambda_vector dist_v;
1236 unsigned int loop_depth;
1238 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1240 /* Independent data accesses. */
1241 vect_check_interleaving (dra, drb);
1245 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1248 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1250 if (vect_print_dump_info (REPORT_DR_DETAILS))
1253 "versioning for alias required: can't determine dependence between ");
1254 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1255 fprintf (vect_dump, " and ");
1256 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1258 /* Add to list of ddrs that need to be tested at run-time. */
1259 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1262 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1264 if (vect_print_dump_info (REPORT_DR_DETAILS))
1266 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1267 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1268 fprintf (vect_dump, " and ");
1269 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1271 /* Add to list of ddrs that need to be tested at run-time. */
1272 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1275 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1276 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1278 int dist = dist_v[loop_depth];
1280 if (vect_print_dump_info (REPORT_DR_DETAILS))
1281 fprintf (vect_dump, "dependence distance = %d.", dist);
1283 /* Same loop iteration. */
1284 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1286 /* Two references with distance zero have the same alignment. */
1287 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1288 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1289 if (vect_print_dump_info (REPORT_ALIGNMENT))
1290 fprintf (vect_dump, "accesses have the same alignment.");
1291 if (vect_print_dump_info (REPORT_DR_DETAILS))
1293 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1294 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1295 fprintf (vect_dump, " and ");
1296 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1299 /* For interleaving, mark that there is a read-write dependency if
1300 necessary. We check before that one of the data-refs is store. */
1301 if (DR_IS_READ (dra))
1302 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1305 if (DR_IS_READ (drb))
1306 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1312 if (abs (dist) >= vectorization_factor
1313 || (dist > 0 && DDR_REVERSED_P (ddr)))
1315 /* Dependence distance does not create dependence, as far as
1316 vectorization is concerned, in this case. If DDR_REVERSED_P the
1317 order of the data-refs in DDR was reversed (to make distance
1318 vector positive), and the actual distance is negative. */
1319 if (vect_print_dump_info (REPORT_DR_DETAILS))
1320 fprintf (vect_dump, "dependence distance >= VF or negative.");
1324 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1327 "not vectorized, possible dependence "
1328 "between data-refs ");
1329 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1330 fprintf (vect_dump, " and ");
1331 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1340 /* Function vect_analyze_data_ref_dependences.
1342 Examine all the data references in the loop, and make sure there do not
1343 exist any data dependences between them. */
1346 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1349 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1350 struct data_dependence_relation *ddr;
1352 if (vect_print_dump_info (REPORT_DETAILS))
1353 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1355 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1356 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1363 /* Function vect_compute_data_ref_alignment
1365 Compute the misalignment of the data reference DR.
1368 1. If during the misalignment computation it is found that the data reference
1369 cannot be vectorized then false is returned.
1370 2. DR_MISALIGNMENT (DR) is defined.
1372 FOR NOW: No analysis is actually performed. Misalignment is calculated
1373 only for trivial cases. TODO. */
1376 vect_compute_data_ref_alignment (struct data_reference *dr)
1378 gimple stmt = DR_STMT (dr);
1379 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1380 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1381 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1382 tree ref = DR_REF (dr);
1384 tree base, base_addr;
1387 tree aligned_to, alignment;
1389 if (vect_print_dump_info (REPORT_DETAILS))
1390 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1392 /* Initialize misalignment to unknown. */
1393 SET_DR_MISALIGNMENT (dr, -1);
1395 misalign = DR_INIT (dr);
1396 aligned_to = DR_ALIGNED_TO (dr);
1397 base_addr = DR_BASE_ADDRESS (dr);
1398 vectype = STMT_VINFO_VECTYPE (stmt_info);
1400 /* In case the dataref is in an inner-loop of the loop that is being
1401 vectorized (LOOP), we use the base and misalignment information
1402 relative to the outer-loop (LOOP). This is ok only if the misalignment
1403 stays the same throughout the execution of the inner-loop, which is why
1404 we have to check that the stride of the dataref in the inner-loop evenly
1405 divides by the vector size. */
1406 if (nested_in_vect_loop_p (loop, stmt))
1408 tree step = DR_STEP (dr);
1409 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1411 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
1413 if (vect_print_dump_info (REPORT_ALIGNMENT))
1414 fprintf (vect_dump, "inner step divides the vector-size.");
1415 misalign = STMT_VINFO_DR_INIT (stmt_info);
1416 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1417 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1421 if (vect_print_dump_info (REPORT_ALIGNMENT))
1422 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1423 misalign = NULL_TREE;
1427 base = build_fold_indirect_ref (base_addr);
1428 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1430 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1433 if (vect_print_dump_info (REPORT_ALIGNMENT))
1435 fprintf (vect_dump, "Unknown alignment for access: ");
1436 print_generic_expr (vect_dump, base, TDF_SLIM);
1442 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1444 || (TREE_CODE (base_addr) == SSA_NAME
1445 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1446 TREE_TYPE (base_addr)))),
1448 base_aligned = true;
1450 base_aligned = false;
1454 /* Do not change the alignment of global variables if
1455 flag_section_anchors is enabled. */
1456 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1457 || (TREE_STATIC (base) && flag_section_anchors))
1459 if (vect_print_dump_info (REPORT_DETAILS))
1461 fprintf (vect_dump, "can't force alignment of ref: ");
1462 print_generic_expr (vect_dump, ref, TDF_SLIM);
1467 /* Force the alignment of the decl.
1468 NOTE: This is the only change to the code we make during
1469 the analysis phase, before deciding to vectorize the loop. */
1470 if (vect_print_dump_info (REPORT_DETAILS))
1471 fprintf (vect_dump, "force alignment");
1472 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1473 DECL_USER_ALIGN (base) = 1;
1476 /* At this point we assume that the base is aligned. */
1477 gcc_assert (base_aligned
1478 || (TREE_CODE (base) == VAR_DECL
1479 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1481 /* Modulo alignment. */
1482 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1484 if (!host_integerp (misalign, 1))
1486 /* Negative or overflowed misalignment value. */
1487 if (vect_print_dump_info (REPORT_DETAILS))
1488 fprintf (vect_dump, "unexpected misalign value");
1492 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1494 if (vect_print_dump_info (REPORT_DETAILS))
1496 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1497 print_generic_expr (vect_dump, ref, TDF_SLIM);
1504 /* Function vect_compute_data_refs_alignment
1506 Compute the misalignment of data references in the loop.
1507 Return FALSE if a data reference is found that cannot be vectorized. */
1510 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1512 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1513 struct data_reference *dr;
1516 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1517 if (!vect_compute_data_ref_alignment (dr))
1524 /* Function vect_update_misalignment_for_peel
1526 DR - the data reference whose misalignment is to be adjusted.
1527 DR_PEEL - the data reference whose misalignment is being made
1528 zero in the vector loop by the peel.
1529 NPEEL - the number of iterations in the peel loop if the misalignment
1530 of DR_PEEL is known at compile time. */
1533 vect_update_misalignment_for_peel (struct data_reference *dr,
1534 struct data_reference *dr_peel, int npeel)
1537 VEC(dr_p,heap) *same_align_drs;
1538 struct data_reference *current_dr;
1539 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1540 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1541 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1542 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1544 /* For interleaved data accesses the step in the loop must be multiplied by
1545 the size of the interleaving group. */
1546 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1547 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1548 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1549 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1551 /* It can be assumed that the data refs with the same alignment as dr_peel
1552 are aligned in the vector loop. */
1554 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1555 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1557 if (current_dr != dr)
1559 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1560 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1561 SET_DR_MISALIGNMENT (dr, 0);
1565 if (known_alignment_for_access_p (dr)
1566 && known_alignment_for_access_p (dr_peel))
1568 int misal = DR_MISALIGNMENT (dr);
1569 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1570 misal += npeel * dr_size;
1571 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1572 SET_DR_MISALIGNMENT (dr, misal);
1576 if (vect_print_dump_info (REPORT_DETAILS))
1577 fprintf (vect_dump, "Setting misalignment to -1.");
1578 SET_DR_MISALIGNMENT (dr, -1);
1582 /* Function vect_verify_datarefs_alignment
1584 Return TRUE if all data references in the loop can be
1585 handled with respect to alignment. */
1588 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1590 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1591 struct data_reference *dr;
1592 enum dr_alignment_support supportable_dr_alignment;
1595 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1597 gimple stmt = DR_STMT (dr);
1598 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1600 /* For interleaving, only the alignment of the first access matters. */
1601 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1602 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1605 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1606 if (!supportable_dr_alignment)
1608 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1610 if (DR_IS_READ (dr))
1612 "not vectorized: unsupported unaligned load.");
1615 "not vectorized: unsupported unaligned store.");
1619 if (supportable_dr_alignment != dr_aligned
1620 && vect_print_dump_info (REPORT_ALIGNMENT))
1621 fprintf (vect_dump, "Vectorizing an unaligned access.");
1627 /* Function vector_alignment_reachable_p
1629 Return true if vector alignment for DR is reachable by peeling
1630 a few loop iterations. Return false otherwise. */
1633 vector_alignment_reachable_p (struct data_reference *dr)
1635 gimple stmt = DR_STMT (dr);
1636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1637 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1639 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1641 /* For interleaved access we peel only if number of iterations in
1642 the prolog loop ({VF - misalignment}), is a multiple of the
1643 number of the interleaved accesses. */
1644 int elem_size, mis_in_elements;
1645 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1647 /* FORNOW: handle only known alignment. */
1648 if (!known_alignment_for_access_p (dr))
1651 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1652 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1654 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1658 /* If misalignment is known at the compile time then allow peeling
1659 only if natural alignment is reachable through peeling. */
1660 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1662 HOST_WIDE_INT elmsize =
1663 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1664 if (vect_print_dump_info (REPORT_DETAILS))
1666 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1667 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1669 if (DR_MISALIGNMENT (dr) % elmsize)
1671 if (vect_print_dump_info (REPORT_DETAILS))
1672 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1677 if (!known_alignment_for_access_p (dr))
1679 tree type = (TREE_TYPE (DR_REF (dr)));
1680 tree ba = DR_BASE_OBJECT (dr);
1681 bool is_packed = false;
1684 is_packed = contains_packed_reference (ba);
1686 if (vect_print_dump_info (REPORT_DETAILS))
1687 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1688 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1697 /* Function vect_enhance_data_refs_alignment
1699 This pass will use loop versioning and loop peeling in order to enhance
1700 the alignment of data references in the loop.
1702 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1703 original loop is to be vectorized; Any other loops that are created by
1704 the transformations performed in this pass - are not supposed to be
1705 vectorized. This restriction will be relaxed.
1707 This pass will require a cost model to guide it whether to apply peeling
1708 or versioning or a combination of the two. For example, the scheme that
1709 intel uses when given a loop with several memory accesses, is as follows:
1710 choose one memory access ('p') which alignment you want to force by doing
1711 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1712 other accesses are not necessarily aligned, or (2) use loop versioning to
1713 generate one loop in which all accesses are aligned, and another loop in
1714 which only 'p' is necessarily aligned.
1716 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1717 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1718 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1720 Devising a cost model is the most critical aspect of this work. It will
1721 guide us on which access to peel for, whether to use loop versioning, how
1722 many versions to create, etc. The cost model will probably consist of
1723 generic considerations as well as target specific considerations (on
1724 powerpc for example, misaligned stores are more painful than misaligned
1727 Here are the general steps involved in alignment enhancements:
1729 -- original loop, before alignment analysis:
1730 for (i=0; i<N; i++){
1731 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1732 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1735 -- After vect_compute_data_refs_alignment:
1736 for (i=0; i<N; i++){
1737 x = q[i]; # DR_MISALIGNMENT(q) = 3
1738 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1741 -- Possibility 1: we do loop versioning:
1743 for (i=0; i<N; i++){ # loop 1A
1744 x = q[i]; # DR_MISALIGNMENT(q) = 3
1745 p[i] = y; # DR_MISALIGNMENT(p) = 0
1749 for (i=0; i<N; i++){ # loop 1B
1750 x = q[i]; # DR_MISALIGNMENT(q) = 3
1751 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1755 -- Possibility 2: we do loop peeling:
1756 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1760 for (i = 3; i < N; i++){ # loop 2A
1761 x = q[i]; # DR_MISALIGNMENT(q) = 0
1762 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1765 -- Possibility 3: combination of loop peeling and versioning:
1766 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1771 for (i = 3; i<N; i++){ # loop 3A
1772 x = q[i]; # DR_MISALIGNMENT(q) = 0
1773 p[i] = y; # DR_MISALIGNMENT(p) = 0
1777 for (i = 3; i<N; i++){ # loop 3B
1778 x = q[i]; # DR_MISALIGNMENT(q) = 0
1779 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1783 These loops are later passed to loop_transform to be vectorized. The
1784 vectorizer will use the alignment information to guide the transformation
1785 (whether to generate regular loads/stores, or with special handling for
1789 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1791 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1792 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1793 enum dr_alignment_support supportable_dr_alignment;
1794 struct data_reference *dr0 = NULL;
1795 struct data_reference *dr;
1797 bool do_peeling = false;
1798 bool do_versioning = false;
1801 stmt_vec_info stmt_info;
1802 int vect_versioning_for_alias_required;
1804 if (vect_print_dump_info (REPORT_DETAILS))
1805 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1807 /* While cost model enhancements are expected in the future, the high level
1808 view of the code at this time is as follows:
1810 A) If there is a misaligned write then see if peeling to align this write
1811 can make all data references satisfy vect_supportable_dr_alignment.
1812 If so, update data structures as needed and return true. Note that
1813 at this time vect_supportable_dr_alignment is known to return false
1814 for a misaligned write.
1816 B) If peeling wasn't possible and there is a data reference with an
1817 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1818 then see if loop versioning checks can be used to make all data
1819 references satisfy vect_supportable_dr_alignment. If so, update
1820 data structures as needed and return true.
1822 C) If neither peeling nor versioning were successful then return false if
1823 any data reference does not satisfy vect_supportable_dr_alignment.
1825 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1827 Note, Possibility 3 above (which is peeling and versioning together) is not
1828 being done at this time. */
1830 /* (1) Peeling to force alignment. */
1832 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1834 + How many accesses will become aligned due to the peeling
1835 - How many accesses will become unaligned due to the peeling,
1836 and the cost of misaligned accesses.
1837 - The cost of peeling (the extra runtime checks, the increase
1840 The scheme we use FORNOW: peel to force the alignment of the first
1841 misaligned store in the loop.
1842 Rationale: misaligned stores are not yet supported.
1844 TODO: Use a cost model. */
1846 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1848 stmt = DR_STMT (dr);
1849 stmt_info = vinfo_for_stmt (stmt);
1851 /* For interleaving, only the alignment of the first access
1853 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1854 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1857 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1859 do_peeling = vector_alignment_reachable_p (dr);
1862 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1863 fprintf (vect_dump, "vector alignment may not be reachable");
1868 vect_versioning_for_alias_required =
1869 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1871 /* Temporarily, if versioning for alias is required, we disable peeling
1872 until we support peeling and versioning. Often peeling for alignment
1873 will require peeling for loop-bound, which in turn requires that we
1874 know how to adjust the loop ivs after the loop. */
1875 if (vect_versioning_for_alias_required
1876 || !vect_can_advance_ivs_p (loop_vinfo)
1877 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1884 gimple stmt = DR_STMT (dr0);
1885 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1886 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1887 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1889 if (known_alignment_for_access_p (dr0))
1891 /* Since it's known at compile time, compute the number of iterations
1892 in the peeled loop (the peeling factor) for use in updating
1893 DR_MISALIGNMENT values. The peeling factor is the vectorization
1894 factor minus the misalignment as an element count. */
1895 mis = DR_MISALIGNMENT (dr0);
1896 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1897 npeel = nelements - mis;
1899 /* For interleaved data access every iteration accesses all the
1900 members of the group, therefore we divide the number of iterations
1901 by the group size. */
1902 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1903 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1904 npeel /= DR_GROUP_SIZE (stmt_info);
1906 if (vect_print_dump_info (REPORT_DETAILS))
1907 fprintf (vect_dump, "Try peeling by %d", npeel);
1910 /* Ensure that all data refs can be vectorized after the peel. */
1911 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1913 int save_misalignment;
1918 stmt = DR_STMT (dr);
1919 stmt_info = vinfo_for_stmt (stmt);
1920 /* For interleaving, only the alignment of the first access
1922 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1923 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1926 save_misalignment = DR_MISALIGNMENT (dr);
1927 vect_update_misalignment_for_peel (dr, dr0, npeel);
1928 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1929 SET_DR_MISALIGNMENT (dr, save_misalignment);
1931 if (!supportable_dr_alignment)
1940 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1941 If the misalignment of DR_i is identical to that of dr0 then set
1942 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1943 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1944 by the peeling factor times the element size of DR_i (MOD the
1945 vectorization factor times the size). Otherwise, the
1946 misalignment of DR_i must be set to unknown. */
1947 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1949 vect_update_misalignment_for_peel (dr, dr0, npeel);
1951 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1952 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1953 SET_DR_MISALIGNMENT (dr0, 0);
1954 if (vect_print_dump_info (REPORT_ALIGNMENT))
1955 fprintf (vect_dump, "Alignment of access forced using peeling.");
1957 if (vect_print_dump_info (REPORT_DETAILS))
1958 fprintf (vect_dump, "Peeling for alignment will be applied.");
1960 stat = vect_verify_datarefs_alignment (loop_vinfo);
1967 /* (2) Versioning to force alignment. */
1969 /* Try versioning if:
1970 1) flag_tree_vect_loop_version is TRUE
1971 2) optimize_size is FALSE
1972 3) there is at least one unsupported misaligned data ref with an unknown
1974 4) all misaligned data refs with a known misalignment are supported, and
1975 5) the number of runtime alignment checks is within reason. */
1978 flag_tree_vect_loop_version
1980 && (!loop->inner); /* FORNOW */
1984 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1986 stmt = DR_STMT (dr);
1987 stmt_info = vinfo_for_stmt (stmt);
1989 /* For interleaving, only the alignment of the first access
1991 if (aligned_access_p (dr)
1992 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1993 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1996 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1998 if (!supportable_dr_alignment)
2004 if (known_alignment_for_access_p (dr)
2005 || VEC_length (gimple,
2006 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2007 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2009 do_versioning = false;
2013 stmt = DR_STMT (dr);
2014 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2015 gcc_assert (vectype);
2017 /* The rightmost bits of an aligned address must be zeros.
2018 Construct the mask needed for this test. For example,
2019 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2020 mask must be 15 = 0xf. */
2021 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2023 /* FORNOW: use the same mask to test all potentially unaligned
2024 references in the loop. The vectorizer currently supports
2025 a single vector size, see the reference to
2026 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2027 vectorization factor is computed. */
2028 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2029 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2030 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2031 VEC_safe_push (gimple, heap,
2032 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2037 /* Versioning requires at least one misaligned data reference. */
2038 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2039 do_versioning = false;
2040 else if (!do_versioning)
2041 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2046 VEC(gimple,heap) *may_misalign_stmts
2047 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2050 /* It can now be assumed that the data references in the statements
2051 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2052 of the loop being vectorized. */
2053 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
2055 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2056 dr = STMT_VINFO_DATA_REF (stmt_info);
2057 SET_DR_MISALIGNMENT (dr, 0);
2058 if (vect_print_dump_info (REPORT_ALIGNMENT))
2059 fprintf (vect_dump, "Alignment of access forced using versioning.");
2062 if (vect_print_dump_info (REPORT_DETAILS))
2063 fprintf (vect_dump, "Versioning for alignment will be applied.");
2065 /* Peeling and versioning can't be done together at this time. */
2066 gcc_assert (! (do_peeling && do_versioning));
2068 stat = vect_verify_datarefs_alignment (loop_vinfo);
2073 /* This point is reached if neither peeling nor versioning is being done. */
2074 gcc_assert (! (do_peeling || do_versioning));
2076 stat = vect_verify_datarefs_alignment (loop_vinfo);
2081 /* Function vect_analyze_data_refs_alignment
2083 Analyze the alignment of the data-references in the loop.
2084 Return FALSE if a data reference is found that cannot be vectorized. */
2087 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2089 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2092 if (!vect_compute_data_refs_alignment (loop_vinfo))
2094 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2096 "not vectorized: can't calculate alignment for data ref.");
2104 /* Analyze groups of strided accesses: check that DR belongs to a group of
2105 strided accesses of legal size, step, etc. Detect gaps, single element
2106 interleaving, and other special cases. Set strided access info.
2107 Collect groups of strided stores for further use in SLP analysis. */
2110 vect_analyze_group_access (struct data_reference *dr)
2112 tree step = DR_STEP (dr);
2113 tree scalar_type = TREE_TYPE (DR_REF (dr));
2114 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2115 gimple stmt = DR_STMT (dr);
2116 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2117 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2118 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2119 HOST_WIDE_INT stride;
2120 bool slp_impossible = false;
2122 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2123 interleaving group (including gaps). */
2124 stride = dr_step / type_size;
2126 /* Not consecutive access is possible only if it is a part of interleaving. */
2127 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2129 /* Check if it this DR is a part of interleaving, and is a single
2130 element of the group that is accessed in the loop. */
2132 /* Gaps are supported only for loads. STEP must be a multiple of the type
2133 size. The size of the group must be a power of 2. */
2135 && (dr_step % type_size) == 0
2137 && exact_log2 (stride) != -1)
2139 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2140 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2141 if (vect_print_dump_info (REPORT_DR_DETAILS))
2143 fprintf (vect_dump, "Detected single element interleaving %d ",
2144 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2145 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2146 fprintf (vect_dump, " step ");
2147 print_generic_expr (vect_dump, step, TDF_SLIM);
2151 if (vect_print_dump_info (REPORT_DETAILS))
2152 fprintf (vect_dump, "not consecutive access");
2156 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2158 /* First stmt in the interleaving chain. Check the chain. */
2159 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2160 struct data_reference *data_ref = dr;
2161 unsigned int count = 1;
2163 tree prev_init = DR_INIT (data_ref);
2165 HOST_WIDE_INT diff, count_in_bytes;
2169 /* Skip same data-refs. In case that two or more stmts share data-ref
2170 (supported only for loads), we vectorize only the first stmt, and
2171 the rest get their vectorized loads from the first one. */
2172 if (!tree_int_cst_compare (DR_INIT (data_ref),
2173 DR_INIT (STMT_VINFO_DATA_REF (
2174 vinfo_for_stmt (next)))))
2176 if (!DR_IS_READ (data_ref))
2178 if (vect_print_dump_info (REPORT_DETAILS))
2179 fprintf (vect_dump, "Two store stmts share the same dr.");
2183 /* Check that there is no load-store dependencies for this loads
2184 to prevent a case of load-store-load to the same location. */
2185 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2186 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2188 if (vect_print_dump_info (REPORT_DETAILS))
2190 "READ_WRITE dependence in interleaving.");
2194 /* For load use the same data-ref load. */
2195 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2198 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2203 /* Check that all the accesses have the same STEP. */
2204 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2205 if (tree_int_cst_compare (step, next_step))
2207 if (vect_print_dump_info (REPORT_DETAILS))
2208 fprintf (vect_dump, "not consecutive access in interleaving");
2212 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2213 /* Check that the distance between two accesses is equal to the type
2214 size. Otherwise, we have gaps. */
2215 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2216 - TREE_INT_CST_LOW (prev_init)) / type_size;
2219 /* FORNOW: SLP of accesses with gaps is not supported. */
2220 slp_impossible = true;
2221 if (!DR_IS_READ (data_ref))
2223 if (vect_print_dump_info (REPORT_DETAILS))
2224 fprintf (vect_dump, "interleaved store with gaps");
2229 /* Store the gap from the previous member of the group. If there is no
2230 gap in the access, DR_GROUP_GAP is always 1. */
2231 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2233 prev_init = DR_INIT (data_ref);
2234 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2235 /* Count the number of data-refs in the chain. */
2239 /* COUNT is the number of accesses found, we multiply it by the size of
2240 the type to get COUNT_IN_BYTES. */
2241 count_in_bytes = type_size * count;
2243 /* Check that the size of the interleaving is not greater than STEP. */
2244 if (dr_step < count_in_bytes)
2246 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "interleaving size is greater than step for ");
2249 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2254 /* Check that the size of the interleaving is equal to STEP for stores,
2255 i.e., that there are no gaps. */
2256 if (dr_step != count_in_bytes)
2258 if (DR_IS_READ (dr))
2260 slp_impossible = true;
2261 /* There is a gap after the last load in the group. This gap is a
2262 difference between the stride and the number of elements. When
2263 there is no gap, this difference should be 0. */
2264 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2268 if (vect_print_dump_info (REPORT_DETAILS))
2269 fprintf (vect_dump, "interleaved store with gaps");
2274 /* Check that STEP is a multiple of type size. */
2275 if ((dr_step % type_size) != 0)
2277 if (vect_print_dump_info (REPORT_DETAILS))
2279 fprintf (vect_dump, "step is not a multiple of type size: step ");
2280 print_generic_expr (vect_dump, step, TDF_SLIM);
2281 fprintf (vect_dump, " size ");
2282 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2288 /* FORNOW: we handle only interleaving that is a power of 2.
2289 We don't fail here if it may be still possible to vectorize the
2290 group using SLP. If not, the size of the group will be checked in
2291 vect_analyze_operations, and the vectorization will fail. */
2292 if (exact_log2 (stride) == -1)
2294 if (vect_print_dump_info (REPORT_DETAILS))
2295 fprintf (vect_dump, "interleaving is not a power of 2");
2300 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2301 if (vect_print_dump_info (REPORT_DETAILS))
2302 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2304 /* SLP: create an SLP data structure for every interleaving group of
2305 stores for further analysis in vect_analyse_slp. */
2306 if (!DR_IS_READ (dr) && !slp_impossible)
2307 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2314 /* Analyze the access pattern of the data-reference DR.
2315 In case of non-consecutive accesses call vect_analyze_group_access() to
2316 analyze groups of strided accesses. */
2319 vect_analyze_data_ref_access (struct data_reference *dr)
2321 tree step = DR_STEP (dr);
2322 tree scalar_type = TREE_TYPE (DR_REF (dr));
2323 gimple stmt = DR_STMT (dr);
2324 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2325 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2327 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2331 if (vect_print_dump_info (REPORT_DETAILS))
2332 fprintf (vect_dump, "bad data-ref access");
2336 /* Don't allow invariant accesses. */
2340 if (nested_in_vect_loop_p (loop, stmt))
2342 /* Interleaved accesses are not yet supported within outer-loop
2343 vectorization for references in the inner-loop. */
2344 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2346 /* For the rest of the analysis we use the outer-loop step. */
2347 step = STMT_VINFO_DR_STEP (stmt_info);
2348 dr_step = TREE_INT_CST_LOW (step);
2352 if (vect_print_dump_info (REPORT_ALIGNMENT))
2353 fprintf (vect_dump, "zero step in outer loop.");
2354 if (DR_IS_READ (dr))
2362 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2364 /* Mark that it is not interleaving. */
2365 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2369 if (nested_in_vect_loop_p (loop, stmt))
2371 if (vect_print_dump_info (REPORT_ALIGNMENT))
2372 fprintf (vect_dump, "strided access in outer loop.");
2376 /* Not consecutive access - check if it's a part of interleaving group. */
2377 return vect_analyze_group_access (dr);
2381 /* Function vect_analyze_data_ref_accesses.
2383 Analyze the access pattern of all the data references in the loop.
2385 FORNOW: the only access pattern that is considered vectorizable is a
2386 simple step 1 (consecutive) access.
2388 FORNOW: handle only arrays and pointer accesses. */
2391 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2394 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2395 struct data_reference *dr;
2397 if (vect_print_dump_info (REPORT_DETAILS))
2398 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2400 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2401 if (!vect_analyze_data_ref_access (dr))
2403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2404 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2411 /* Function vect_prune_runtime_alias_test_list.
2413 Prune a list of ddrs to be tested at run-time by versioning for alias.
2414 Return FALSE if resulting list of ddrs is longer then allowed by
2415 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2418 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2420 VEC (ddr_p, heap) * ddrs =
2421 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2424 if (vect_print_dump_info (REPORT_DETAILS))
2425 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2427 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2432 ddr_i = VEC_index (ddr_p, ddrs, i);
2435 for (j = 0; j < i; j++)
2437 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2439 if (vect_vfa_range_equal (ddr_i, ddr_j))
2441 if (vect_print_dump_info (REPORT_DR_DETAILS))
2443 fprintf (vect_dump, "found equal ranges ");
2444 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2445 fprintf (vect_dump, ", ");
2446 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2447 fprintf (vect_dump, " and ");
2448 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2449 fprintf (vect_dump, ", ");
2450 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2459 VEC_ordered_remove (ddr_p, ddrs, i);
2465 if (VEC_length (ddr_p, ddrs) >
2466 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2468 if (vect_print_dump_info (REPORT_DR_DETAILS))
2471 "disable versioning for alias - max number of generated "
2472 "checks exceeded.");
2475 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2483 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2486 vect_free_slp_tree (slp_tree node)
2491 if (SLP_TREE_LEFT (node))
2492 vect_free_slp_tree (SLP_TREE_LEFT (node));
2494 if (SLP_TREE_RIGHT (node))
2495 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2497 VEC_free (gimple, heap, SLP_TREE_SCALAR_STMTS (node));
2499 if (SLP_TREE_VEC_STMTS (node))
2500 VEC_free (gimple, heap, SLP_TREE_VEC_STMTS (node));
2506 /* Get the defs for the rhs of STMT (collect them in DEF_STMTS0/1), check that
2507 they are of a legal type and that they match the defs of the first stmt of
2508 the SLP group (stored in FIRST_STMT_...). */
2511 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2512 gimple stmt, VEC (gimple, heap) **def_stmts0,
2513 VEC (gimple, heap) **def_stmts1,
2514 enum vect_def_type *first_stmt_dt0,
2515 enum vect_def_type *first_stmt_dt1,
2516 tree *first_stmt_def0_type,
2517 tree *first_stmt_def1_type,
2518 tree *first_stmt_const_oprnd,
2519 int ncopies_for_cost,
2520 bool *pattern0, bool *pattern1)
2523 unsigned int i, number_of_oprnds;
2526 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2527 stmt_vec_info stmt_info =
2528 vinfo_for_stmt (VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2529 enum gimple_rhs_class rhs_class;
2530 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2532 rhs_class = get_gimple_rhs_class (gimple_assign_rhs_code (stmt));
2533 number_of_oprnds = gimple_num_ops (stmt) - 1; /* RHS only */
2535 for (i = 0; i < number_of_oprnds; i++)
2537 oprnd = gimple_op (stmt, i + 1);
2539 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2540 || (!def_stmt && dt[i] != vect_constant_def))
2542 if (vect_print_dump_info (REPORT_SLP))
2544 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2545 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2551 /* Check if DEF_STMT is a part of a pattern and get the def stmt from
2552 the pattern. Check that all the stmts of the node are in the
2554 if (def_stmt && gimple_bb (def_stmt)
2555 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2556 && vinfo_for_stmt (def_stmt)
2557 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)))
2559 if (!*first_stmt_dt0)
2563 if (i == 1 && !*first_stmt_dt1)
2565 else if ((i == 0 && !*pattern0) || (i == 1 && !*pattern1))
2567 if (vect_print_dump_info (REPORT_DETAILS))
2569 fprintf (vect_dump, "Build SLP failed: some of the stmts"
2570 " are in a pattern, and others are not ");
2571 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2578 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
2579 dt[i] = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
2581 if (*dt == vect_unknown_def_type)
2583 if (vect_print_dump_info (REPORT_DETAILS))
2584 fprintf (vect_dump, "Unsupported pattern.");
2588 switch (gimple_code (def_stmt))
2591 def = gimple_phi_result (def_stmt);
2595 def = gimple_assign_lhs (def_stmt);
2599 if (vect_print_dump_info (REPORT_DETAILS))
2600 fprintf (vect_dump, "unsupported defining stmt: ");
2605 if (!*first_stmt_dt0)
2607 /* op0 of the first stmt of the group - store its info. */
2608 *first_stmt_dt0 = dt[i];
2610 *first_stmt_def0_type = TREE_TYPE (def);
2612 *first_stmt_const_oprnd = oprnd;
2614 /* Analyze costs (for the first stmt of the group only). */
2615 if (rhs_class != GIMPLE_SINGLE_RHS)
2616 /* Not memory operation (we don't call this functions for loads). */
2617 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2620 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2625 if (!*first_stmt_dt1 && i == 1)
2627 /* op1 of the first stmt of the group - store its info. */
2628 *first_stmt_dt1 = dt[i];
2630 *first_stmt_def1_type = TREE_TYPE (def);
2633 /* We assume that the stmt contains only one constant
2634 operand. We fail otherwise, to be on the safe side. */
2635 if (*first_stmt_const_oprnd)
2637 if (vect_print_dump_info (REPORT_SLP))
2638 fprintf (vect_dump, "Build SLP failed: two constant "
2642 *first_stmt_const_oprnd = oprnd;
2647 /* Not first stmt of the group, check that the def-stmt/s match
2648 the def-stmt/s of the first stmt. */
2650 && (*first_stmt_dt0 != dt[i]
2651 || (*first_stmt_def0_type && def
2652 && *first_stmt_def0_type != TREE_TYPE (def))))
2654 && (*first_stmt_dt1 != dt[i]
2655 || (*first_stmt_def1_type && def
2656 && *first_stmt_def1_type != TREE_TYPE (def))))
2658 && TREE_TYPE (*first_stmt_const_oprnd)
2659 != TREE_TYPE (oprnd)))
2661 if (vect_print_dump_info (REPORT_SLP))
2662 fprintf (vect_dump, "Build SLP failed: different types ");
2669 /* Check the types of the definitions. */
2672 case vect_constant_def:
2673 case vect_invariant_def:
2678 VEC_safe_push (gimple, heap, *def_stmts0, def_stmt);
2680 VEC_safe_push (gimple, heap, *def_stmts1, def_stmt);
2684 /* FORNOW: Not supported. */
2685 if (vect_print_dump_info (REPORT_SLP))
2687 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2688 print_generic_expr (vect_dump, def, TDF_SLIM);
2699 /* Recursively build an SLP tree starting from NODE.
2700 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2701 permutation or are of unsupported types of operation. Otherwise, return
2705 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2706 unsigned int group_size,
2707 int *inside_cost, int *outside_cost,
2708 int ncopies_for_cost, unsigned int *max_nunits)
2710 VEC (gimple, heap) *def_stmts0 = VEC_alloc (gimple, heap, group_size);
2711 VEC (gimple, heap) *def_stmts1 = VEC_alloc (gimple, heap, group_size);
2713 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2714 gimple stmt = VEC_index (gimple, stmts, 0);
2715 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2716 enum tree_code first_stmt_code = 0, rhs_code;
2717 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2719 gimple prev_stmt = NULL;
2720 bool stop_recursion = false, need_same_oprnds = false;
2721 tree vectype, scalar_type, first_op1 = NULL_TREE;
2722 unsigned int vectorization_factor = 0, ncopies;
2725 enum machine_mode optab_op2_mode;
2726 enum machine_mode vec_mode;
2727 tree first_stmt_const_oprnd = NULL_TREE;
2728 struct data_reference *first_dr;
2729 bool pattern0 = false, pattern1 = false;
2730 HOST_WIDE_INT dummy;
2732 /* For every stmt in NODE find its def stmt/s. */
2733 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
2735 if (vect_print_dump_info (REPORT_SLP))
2737 fprintf (vect_dump, "Build SLP for ");
2738 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2741 lhs = gimple_get_lhs (stmt);
2742 if (lhs == NULL_TREE)
2744 if (vect_print_dump_info (REPORT_SLP))
2747 "Build SLP failed: not GIMPLE_ASSIGN nor GIMPLE_CALL");
2748 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2754 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
2755 vectype = get_vectype_for_scalar_type (scalar_type);
2758 if (vect_print_dump_info (REPORT_SLP))
2760 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2761 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2766 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2767 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2768 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2769 if (ncopies > 1 && vect_print_dump_info (REPORT_SLP))
2770 fprintf (vect_dump, "SLP with multiple types ");
2772 /* In case of multiple types we need to detect the smallest type. */
2773 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
2774 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
2776 if (is_gimple_call (stmt))
2777 rhs_code = CALL_EXPR;
2779 rhs_code = gimple_assign_rhs_code (stmt);
2781 /* Check the operation. */
2784 first_stmt_code = rhs_code;
2786 /* Shift arguments should be equal in all the packed stmts for a
2787 vector shift with scalar shift operand. */
2788 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
2789 || rhs_code == LROTATE_EXPR
2790 || rhs_code == RROTATE_EXPR)
2792 vec_mode = TYPE_MODE (vectype);
2794 /* First see if we have a vector/vector shift. */
2795 optab = optab_for_tree_code (rhs_code, vectype,
2799 || (optab->handlers[(int) vec_mode].insn_code
2800 == CODE_FOR_nothing))
2802 /* No vector/vector shift, try for a vector/scalar shift. */
2803 optab = optab_for_tree_code (rhs_code, vectype,
2808 if (vect_print_dump_info (REPORT_SLP))
2809 fprintf (vect_dump, "Build SLP failed: no optab.");
2812 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2813 if (icode == CODE_FOR_nothing)
2815 if (vect_print_dump_info (REPORT_SLP))
2817 "Build SLP failed: op not supported by target.");
2820 optab_op2_mode = insn_data[icode].operand[2].mode;
2821 if (!VECTOR_MODE_P (optab_op2_mode))
2823 need_same_oprnds = true;
2824 first_op1 = gimple_assign_rhs2 (stmt);
2831 if (first_stmt_code != rhs_code
2832 && (first_stmt_code != IMAGPART_EXPR
2833 || rhs_code != REALPART_EXPR)
2834 && (first_stmt_code != REALPART_EXPR
2835 || rhs_code != IMAGPART_EXPR))
2837 if (vect_print_dump_info (REPORT_SLP))
2840 "Build SLP failed: different operation in stmt ");
2841 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2847 if (need_same_oprnds
2848 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
2850 if (vect_print_dump_info (REPORT_SLP))
2853 "Build SLP failed: different shift arguments in ");
2854 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2861 /* Strided store or load. */
2862 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2864 if (REFERENCE_CLASS_P (lhs))
2867 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2868 &def_stmts0, &def_stmts1,
2871 &first_stmt_def0_type,
2872 &first_stmt_def1_type,
2873 &first_stmt_const_oprnd,
2875 &pattern0, &pattern1))
2883 /* First stmt of the SLP group should be the first load of
2884 the interleaving loop if data permutation is not allowed.
2885 Check that there is no gap between the loads. */
2886 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt
2887 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 0)
2889 /* FORNOW: data permutations and gaps in loads are not
2891 if (vect_print_dump_info (REPORT_SLP))
2893 fprintf (vect_dump, "Build SLP failed: strided "
2894 " loads need permutation or have gaps ");
2895 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2901 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2902 if (vect_supportable_dr_alignment (first_dr)
2903 == dr_unaligned_unsupported)
2905 if (vect_print_dump_info (REPORT_SLP))
2907 fprintf (vect_dump, "Build SLP failed: unsupported "
2908 " unaligned load ");
2909 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2915 /* Analyze costs (for the first stmt in the group). */
2916 vect_model_load_cost (vinfo_for_stmt (stmt),
2917 ncopies_for_cost, *node);
2921 /* Check that we have consecutive loads from interleaving
2922 chain and that there is no gap between the loads. */
2923 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt
2924 || DR_GROUP_GAP (vinfo_for_stmt (stmt)) != 1)
2926 /* FORNOW: data permutations and gaps in loads are not
2928 if (vect_print_dump_info (REPORT_SLP))
2930 fprintf (vect_dump, "Build SLP failed: strided "
2931 " loads need permutation or have gaps ");
2932 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2940 /* We stop the tree when we reach a group of loads. */
2941 stop_recursion = true;
2944 } /* Strided access. */
2947 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
2949 /* Not strided load. */
2950 if (vect_print_dump_info (REPORT_SLP))
2952 fprintf (vect_dump, "Build SLP failed: not strided load ");
2953 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2956 /* FORNOW: Not strided loads are not supported. */
2960 /* Not memory operation. */
2961 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
2962 && TREE_CODE_CLASS (rhs_code) != tcc_unary)
2964 if (vect_print_dump_info (REPORT_SLP))
2966 fprintf (vect_dump, "Build SLP failed: operation");
2967 fprintf (vect_dump, " unsupported ");
2968 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2974 /* Find the def-stmts. */
2975 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, stmt,
2976 &def_stmts0, &def_stmts1,
2977 &first_stmt_dt0, &first_stmt_dt1,
2978 &first_stmt_def0_type,
2979 &first_stmt_def1_type,
2980 &first_stmt_const_oprnd,
2982 &pattern0, &pattern1))
2987 /* Add the costs of the node to the overall instance costs. */
2988 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2989 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2991 /* Strided loads were reached - stop the recursion. */
2995 /* Create SLP_TREE nodes for the definition node/s. */
2996 if (first_stmt_dt0 == vect_loop_def)
2998 slp_tree left_node = XNEW (struct _slp_tree);
2999 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
3000 SLP_TREE_VEC_STMTS (left_node) = NULL;
3001 SLP_TREE_LEFT (left_node) = NULL;
3002 SLP_TREE_RIGHT (left_node) = NULL;
3003 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
3004 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
3005 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
3006 inside_cost, outside_cost,
3007 ncopies_for_cost, max_nunits))
3010 SLP_TREE_LEFT (*node) = left_node;
3013 if (first_stmt_dt1 == vect_loop_def)
3015 slp_tree right_node = XNEW (struct _slp_tree);
3016 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
3017 SLP_TREE_VEC_STMTS (right_node) = NULL;
3018 SLP_TREE_LEFT (right_node) = NULL;
3019 SLP_TREE_RIGHT (right_node) = NULL;
3020 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
3021 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
3022 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
3023 inside_cost, outside_cost,
3024 ncopies_for_cost, max_nunits))
3027 SLP_TREE_RIGHT (*node) = right_node;
3035 vect_print_slp_tree (slp_tree node)
3043 fprintf (vect_dump, "node ");
3044 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3046 fprintf (vect_dump, "\n\tstmt %d ", i);
3047 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3049 fprintf (vect_dump, "\n");
3051 vect_print_slp_tree (SLP_TREE_LEFT (node));
3052 vect_print_slp_tree (SLP_TREE_RIGHT (node));
3056 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
3057 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
3058 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
3059 stmts in NODE are to be marked. */
3062 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
3070 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3071 if (j < 0 || i == j)
3072 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
3074 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
3075 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
3079 /* Analyze an SLP instance starting from a group of strided stores. Call
3080 vect_build_slp_tree to build a tree of packed stmts if possible.
3081 Return FALSE if it's impossible to SLP any stmt in the loop. */
3084 vect_analyze_slp_instance (loop_vec_info loop_vinfo, gimple stmt)
3086 slp_instance new_instance;
3087 slp_tree node = XNEW (struct _slp_tree);
3088 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
3089 unsigned int unrolling_factor = 1, nunits;
3090 tree vectype, scalar_type;
3092 unsigned int vectorization_factor = 0, ncopies;
3093 bool slp_impossible = false;
3094 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
3095 unsigned int max_nunits = 0;
3097 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
3098 vectype = get_vectype_for_scalar_type (scalar_type);
3101 if (vect_print_dump_info (REPORT_SLP))
3103 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
3104 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
3109 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3110 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3111 ncopies = vectorization_factor / nunits;
3113 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3114 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (gimple, heap, group_size);
3116 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3119 VEC_safe_push (gimple, heap, SLP_TREE_SCALAR_STMTS (node), next);
3120 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3123 SLP_TREE_VEC_STMTS (node) = NULL;
3124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3125 SLP_TREE_LEFT (node) = NULL;
3126 SLP_TREE_RIGHT (node) = NULL;
3127 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3128 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3130 /* Calculate the unrolling factor. */
3131 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3133 /* Calculate the number of vector stmts to create based on the unrolling
3134 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3135 GROUP_SIZE / NUNITS otherwise. */
3136 ncopies_for_cost = unrolling_factor * group_size / nunits;
3138 /* Build the tree for the SLP instance. */
3139 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &inside_cost,
3140 &outside_cost, ncopies_for_cost, &max_nunits))
3142 /* Create a new SLP instance. */
3143 new_instance = XNEW (struct _slp_instance);
3144 SLP_INSTANCE_TREE (new_instance) = node;
3145 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3146 /* Calculate the unrolling factor based on the smallest type. */
3147 if (max_nunits > nunits)
3148 unrolling_factor = least_common_multiple (max_nunits, group_size)
3151 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3152 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3153 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3154 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3156 if (vect_print_dump_info (REPORT_SLP))
3157 vect_print_slp_tree (node);
3162 /* Failed to SLP. */
3163 /* Free the allocated memory. */
3164 vect_free_slp_tree (node);
3169 /* SLP failed for this instance, but it is still possible to SLP other stmts
3175 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3176 trees of packed scalar stmts if SLP is possible. */
3179 vect_analyze_slp (loop_vec_info loop_vinfo)
3182 VEC (gimple, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3185 if (vect_print_dump_info (REPORT_SLP))
3186 fprintf (vect_dump, "=== vect_analyze_slp ===");
3188 for (i = 0; VEC_iterate (gimple, strided_stores, i, store); i++)
3189 if (!vect_analyze_slp_instance (loop_vinfo, store))
3191 /* SLP failed. No instance can be SLPed in the loop. */
3192 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3193 fprintf (vect_dump, "SLP failed.");
3202 /* For each possible SLP instance decide whether to SLP it and calculate overall
3203 unrolling factor needed to SLP the loop. */
3206 vect_make_slp_decision (loop_vec_info loop_vinfo)
3208 unsigned int i, unrolling_factor = 1;
3209 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3210 slp_instance instance;
3211 int decided_to_slp = 0;
3213 if (vect_print_dump_info (REPORT_SLP))
3214 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3216 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3218 /* FORNOW: SLP if you can. */
3219 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3220 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3222 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3223 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3224 loop-based vectorization. Such stmts will be marked as HYBRID. */
3225 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3229 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3231 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3232 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3233 decided_to_slp, unrolling_factor);
3237 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3238 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3241 vect_detect_hybrid_slp_stmts (slp_tree node)
3245 imm_use_iterator imm_iter;
3251 for (i = 0; VEC_iterate (gimple, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3252 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3253 && TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
3254 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
3255 if (vinfo_for_stmt (use_stmt)
3256 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt))
3257 && STMT_VINFO_RELEVANT (vinfo_for_stmt (use_stmt)))
3258 vect_mark_slp_stmts (node, hybrid, i);
3260 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3261 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3265 /* Find stmts that must be both vectorized and SLPed. */
3268 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3271 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3272 slp_instance instance;
3274 if (vect_print_dump_info (REPORT_SLP))
3275 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3277 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3278 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3282 /* Function vect_analyze_data_refs.
3284 Find all the data references in the loop.
3286 The general structure of the analysis of data refs in the vectorizer is as
3288 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3289 find and analyze all data-refs in the loop and their dependences.
3290 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3291 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3292 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3297 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3299 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3301 VEC (data_reference_p, heap) *datarefs;
3302 struct data_reference *dr;
3305 if (vect_print_dump_info (REPORT_DETAILS))
3306 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3308 compute_data_dependences_for_loop (loop, true,
3309 &LOOP_VINFO_DATAREFS (loop_vinfo),
3310 &LOOP_VINFO_DDRS (loop_vinfo));
3312 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3313 from stmt_vec_info struct to DR and vectype. */
3314 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3316 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3319 stmt_vec_info stmt_info;
3321 tree base, offset, init;
3323 if (!dr || !DR_REF (dr))
3325 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3326 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3330 stmt = DR_STMT (dr);
3331 stmt_info = vinfo_for_stmt (stmt);
3333 /* Check that analysis of the data-ref succeeded. */
3334 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3337 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3339 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3340 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3345 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3347 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3348 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3353 if (!DR_SYMBOL_TAG (dr))
3355 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3357 fprintf (vect_dump, "not vectorized: no memory tag for ");
3358 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3363 base = unshare_expr (DR_BASE_ADDRESS (dr));
3364 offset = unshare_expr (DR_OFFSET (dr));
3365 init = unshare_expr (DR_INIT (dr));
3367 /* Update DR field in stmt_vec_info struct. */
3368 bb = gimple_bb (stmt);
3370 /* If the dataref is in an inner-loop of the loop that is considered for
3371 for vectorization, we also want to analyze the access relative to
3372 the outer-loop (DR contains information only relative to the
3373 inner-most enclosing loop). We do that by building a reference to the
3374 first location accessed by the inner-loop, and analyze it relative to
3376 if (nested_in_vect_loop_p (loop, stmt))
3378 tree outer_step, outer_base, outer_init;
3379 HOST_WIDE_INT pbitsize, pbitpos;
3381 enum machine_mode pmode;
3382 int punsignedp, pvolatilep;
3383 affine_iv base_iv, offset_iv;
3386 /* Build a reference to the first location accessed by the
3387 inner-loop: *(BASE+INIT). (The first location is actually
3388 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3389 tree inner_base = build_fold_indirect_ref
3390 (fold_build2 (POINTER_PLUS_EXPR,
3391 TREE_TYPE (base), base,
3392 fold_convert (sizetype, init)));
3394 if (vect_print_dump_info (REPORT_DETAILS))
3396 fprintf (dump_file, "analyze in outer-loop: ");
3397 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3400 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3401 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3402 gcc_assert (outer_base != NULL_TREE);
3404 if (pbitpos % BITS_PER_UNIT != 0)
3406 if (vect_print_dump_info (REPORT_DETAILS))
3407 fprintf (dump_file, "failed: bit offset alignment.\n");
3411 outer_base = build_fold_addr_expr (outer_base);
3412 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3414 if (vect_print_dump_info (REPORT_DETAILS))
3415 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3422 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3429 offset_iv.base = ssize_int (0);
3430 offset_iv.step = ssize_int (0);
3432 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3434 if (vect_print_dump_info (REPORT_DETAILS))
3435 fprintf (dump_file, "evolution of offset is not affine.\n");
3439 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3440 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3441 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3442 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3443 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3445 outer_step = size_binop (PLUS_EXPR,
3446 fold_convert (ssizetype, base_iv.step),
3447 fold_convert (ssizetype, offset_iv.step));
3449 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3450 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3451 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3452 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3453 STMT_VINFO_DR_OFFSET (stmt_info) =
3454 fold_convert (ssizetype, offset_iv.base);
3455 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3456 size_int (highest_pow2_factor (offset_iv.base));
3458 if (dump_file && (dump_flags & TDF_DETAILS))
3460 fprintf (dump_file, "\touter base_address: ");
3461 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3462 fprintf (dump_file, "\n\touter offset from base address: ");
3463 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3464 fprintf (dump_file, "\n\touter constant offset from base address: ");
3465 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3466 fprintf (dump_file, "\n\touter step: ");
3467 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3468 fprintf (dump_file, "\n\touter aligned to: ");
3469 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3473 if (STMT_VINFO_DATA_REF (stmt_info))
3475 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3478 "not vectorized: more than one data ref in stmt: ");
3479 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3483 STMT_VINFO_DATA_REF (stmt_info) = dr;
3485 /* Set vectype for STMT. */
3486 scalar_type = TREE_TYPE (DR_REF (dr));
3487 STMT_VINFO_VECTYPE (stmt_info) =
3488 get_vectype_for_scalar_type (scalar_type);
3489 if (!STMT_VINFO_VECTYPE (stmt_info))
3491 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3494 "not vectorized: no vectype for stmt: ");
3495 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3496 fprintf (vect_dump, " scalar_type: ");
3497 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3507 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3509 /* Function vect_mark_relevant.
3511 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3514 vect_mark_relevant (VEC(gimple,heap) **worklist, gimple stmt,
3515 enum vect_relevant relevant, bool live_p)
3517 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3518 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3519 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3521 if (vect_print_dump_info (REPORT_DETAILS))
3522 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3524 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3526 gimple pattern_stmt;
3528 /* This is the last stmt in a sequence that was detected as a
3529 pattern that can potentially be vectorized. Don't mark the stmt
3530 as relevant/live because it's not going to be vectorized.
3531 Instead mark the pattern-stmt that replaces it. */
3533 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3535 if (vect_print_dump_info (REPORT_DETAILS))
3536 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3537 stmt_info = vinfo_for_stmt (pattern_stmt);
3538 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3539 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3540 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3541 stmt = pattern_stmt;
3544 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3545 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3546 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3548 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3549 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3551 if (vect_print_dump_info (REPORT_DETAILS))
3552 fprintf (vect_dump, "already marked relevant/live.");
3556 VEC_safe_push (gimple, heap, *worklist, stmt);
3560 /* Function vect_stmt_relevant_p.
3562 Return true if STMT in loop that is represented by LOOP_VINFO is
3563 "relevant for vectorization".
3565 A stmt is considered "relevant for vectorization" if:
3566 - it has uses outside the loop.
3567 - it has vdefs (it alters memory).
3568 - control stmts in the loop (except for the exit condition).
3570 CHECKME: what other side effects would the vectorizer allow? */
3573 vect_stmt_relevant_p (gimple stmt, loop_vec_info loop_vinfo,
3574 enum vect_relevant *relevant, bool *live_p)
3576 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3577 ssa_op_iter op_iter;
3578 imm_use_iterator imm_iter;
3579 use_operand_p use_p;
3580 def_operand_p def_p;
3582 *relevant = vect_unused_in_loop;
3585 /* cond stmt other than loop exit cond. */
3586 if (is_ctrl_stmt (stmt)
3587 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3588 *relevant = vect_used_in_loop;
3590 /* changing memory. */
3591 if (gimple_code (stmt) != GIMPLE_PHI)
3592 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3594 if (vect_print_dump_info (REPORT_DETAILS))
3595 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3596 *relevant = vect_used_in_loop;
3599 /* uses outside the loop. */
3600 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3602 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3604 basic_block bb = gimple_bb (USE_STMT (use_p));
3605 if (!flow_bb_inside_loop_p (loop, bb))
3607 if (vect_print_dump_info (REPORT_DETAILS))
3608 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3610 /* We expect all such uses to be in the loop exit phis
3611 (because of loop closed form) */
3612 gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
3613 gcc_assert (bb == single_exit (loop)->dest);
3620 return (*live_p || *relevant);
3625 Function process_use.
3628 - a USE in STMT in a loop represented by LOOP_VINFO
3629 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3630 that defined USE. This is done by calling mark_relevant and passing it
3631 the WORKLIST (to add DEF_STMT to the WORKLIST in case it is relevant).
3634 Generally, LIVE_P and RELEVANT are used to define the liveness and
3635 relevance info of the DEF_STMT of this USE:
3636 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3637 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3639 - case 1: If USE is used only for address computations (e.g. array indexing),
3640 which does not need to be directly vectorized, then the liveness/relevance
3641 of the respective DEF_STMT is left unchanged.
3642 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3643 skip DEF_STMT cause it had already been processed.
3644 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3645 be modified accordingly.
3647 Return true if everything is as expected. Return false otherwise. */
3650 process_use (gimple stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3651 enum vect_relevant relevant, VEC(gimple,heap) **worklist)
3653 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3654 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3655 stmt_vec_info dstmt_vinfo;
3656 basic_block bb, def_bb;
3659 enum vect_def_type dt;
3661 /* case 1: we are only interested in uses that need to be vectorized. Uses
3662 that are used for address computation are not considered relevant. */
3663 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3666 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3668 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3669 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3673 if (!def_stmt || gimple_nop_p (def_stmt))
3676 def_bb = gimple_bb (def_stmt);
3677 if (!flow_bb_inside_loop_p (loop, def_bb))
3679 if (vect_print_dump_info (REPORT_DETAILS))
3680 fprintf (vect_dump, "def_stmt is out of loop.");
3684 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3685 DEF_STMT must have already been processed, because this should be the
3686 only way that STMT, which is a reduction-phi, was put in the worklist,
3687 as there should be no other uses for DEF_STMT in the loop. So we just
3688 check that everything is as expected, and we are done. */
3689 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3690 bb = gimple_bb (stmt);
3691 if (gimple_code (stmt) == GIMPLE_PHI
3692 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3693 && gimple_code (def_stmt) != GIMPLE_PHI
3694 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3695 && bb->loop_father == def_bb->loop_father)
3697 if (vect_print_dump_info (REPORT_DETAILS))
3698 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3699 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3700 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3701 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3702 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3703 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3707 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3708 outer-loop-header-bb:
3714 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3716 if (vect_print_dump_info (REPORT_DETAILS))
3717 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3720 case vect_unused_in_loop:
3721 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3722 vect_used_by_reduction : vect_unused_in_loop;
3724 case vect_used_in_outer_by_reduction:
3725 relevant = vect_used_by_reduction;
3727 case vect_used_in_outer:
3728 relevant = vect_used_in_loop;
3730 case vect_used_by_reduction:
3731 case vect_used_in_loop:
3739 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3740 outer-loop-header-bb:
3746 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3748 if (vect_print_dump_info (REPORT_DETAILS))
3749 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3752 case vect_unused_in_loop:
3753 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3754 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3757 case vect_used_in_outer_by_reduction:
3758 case vect_used_in_outer:
3761 case vect_used_by_reduction:
3762 relevant = vect_used_in_outer_by_reduction;
3765 case vect_used_in_loop:
3766 relevant = vect_used_in_outer;
3774 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3779 /* Function vect_mark_stmts_to_be_vectorized.
3781 Not all stmts in the loop need to be vectorized. For example:
3790 Stmt 1 and 3 do not need to be vectorized, because loop control and
3791 addressing of vectorized data-refs are handled differently.
3793 This pass detects such stmts. */
3796 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3798 VEC(gimple,heap) *worklist;
3799 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3800 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3801 unsigned int nbbs = loop->num_nodes;
3802 gimple_stmt_iterator si;
3805 stmt_vec_info stmt_vinfo;
3809 enum vect_relevant relevant;
3811 if (vect_print_dump_info (REPORT_DETAILS))
3812 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3814 worklist = VEC_alloc (gimple, heap, 64);
3816 /* 1. Init worklist. */
3817 for (i = 0; i < nbbs; i++)
3820 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
3822 phi = gsi_stmt (si);
3823 if (vect_print_dump_info (REPORT_DETAILS))
3825 fprintf (vect_dump, "init: phi relevant? ");
3826 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3829 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3830 vect_mark_relevant (&worklist, phi, relevant, live_p);
3832 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3834 stmt = gsi_stmt (si);
3835 if (vect_print_dump_info (REPORT_DETAILS))
3837 fprintf (vect_dump, "init: stmt relevant? ");
3838 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3841 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3842 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3846 /* 2. Process_worklist */
3847 while (VEC_length (gimple, worklist) > 0)
3849 use_operand_p use_p;
3852 stmt = VEC_pop (gimple, worklist);
3853 if (vect_print_dump_info (REPORT_DETAILS))
3855 fprintf (vect_dump, "worklist: examine stmt: ");
3856 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3859 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3860 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3861 liveness and relevance properties of STMT. */
3862 stmt_vinfo = vinfo_for_stmt (stmt);
3863 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3864 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3866 /* Generally, the liveness and relevance properties of STMT are
3867 propagated as is to the DEF_STMTs of its USEs:
3868 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3869 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3871 One exception is when STMT has been identified as defining a reduction
3872 variable; in this case we set the liveness/relevance as follows:
3874 relevant = vect_used_by_reduction
3875 This is because we distinguish between two kinds of relevant stmts -
3876 those that are used by a reduction computation, and those that are
3877 (also) used by a regular computation. This allows us later on to
3878 identify stmts that are used solely by a reduction, and therefore the
3879 order of the results that they produce does not have to be kept.
3881 Reduction phis are expected to be used by a reduction stmt, or by
3882 in an outer loop; Other reduction stmts are expected to be
3883 in the loop, and possibly used by a stmt in an outer loop.
3884 Here are the expected values of "relevant" for reduction phis/stmts:
3887 vect_unused_in_loop ok
3888 vect_used_in_outer_by_reduction ok ok
3889 vect_used_in_outer ok ok
3890 vect_used_by_reduction ok
3891 vect_used_in_loop */
3893 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3895 enum vect_relevant tmp_relevant = relevant;
3896 switch (tmp_relevant)
3898 case vect_unused_in_loop:
3899 gcc_assert (gimple_code (stmt) != GIMPLE_PHI);
3900 relevant = vect_used_by_reduction;
3903 case vect_used_in_outer_by_reduction:
3904 case vect_used_in_outer:
3905 gcc_assert (gimple_code (stmt) != WIDEN_SUM_EXPR
3906 && gimple_code (stmt) != DOT_PROD_EXPR);
3909 case vect_used_by_reduction:
3910 if (gimple_code (stmt) == GIMPLE_PHI)
3913 case vect_used_in_loop:
3915 if (vect_print_dump_info (REPORT_DETAILS))
3916 fprintf (vect_dump, "unsupported use of reduction.");
3917 VEC_free (gimple, heap, worklist);
3923 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3925 tree op = USE_FROM_PTR (use_p);
3926 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3928 VEC_free (gimple, heap, worklist);
3932 } /* while worklist */
3934 VEC_free (gimple, heap, worklist);
3939 /* Function vect_can_advance_ivs_p
3941 In case the number of iterations that LOOP iterates is unknown at compile
3942 time, an epilog loop will be generated, and the loop induction variables
3943 (IVs) will be "advanced" to the value they are supposed to take just before
3944 the epilog loop. Here we check that the access function of the loop IVs
3945 and the expression that represents the loop bound are simple enough.
3946 These restrictions will be relaxed in the future. */
3949 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3951 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3952 basic_block bb = loop->header;
3954 gimple_stmt_iterator gsi;
3956 /* Analyze phi functions of the loop header. */
3958 if (vect_print_dump_info (REPORT_DETAILS))
3959 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3961 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3963 tree access_fn = NULL;
3964 tree evolution_part;
3966 phi = gsi_stmt (gsi);
3967 if (vect_print_dump_info (REPORT_DETAILS))
3969 fprintf (vect_dump, "Analyze phi: ");
3970 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
3973 /* Skip virtual phi's. The data dependences that are associated with
3974 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3976 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3978 if (vect_print_dump_info (REPORT_DETAILS))
3979 fprintf (vect_dump, "virtual phi. skip.");
3983 /* Skip reduction phis. */
3985 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3987 if (vect_print_dump_info (REPORT_DETAILS))
3988 fprintf (vect_dump, "reduc phi. skip.");
3992 /* Analyze the evolution function. */
3994 access_fn = instantiate_parameters
3995 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3999 if (vect_print_dump_info (REPORT_DETAILS))
4000 fprintf (vect_dump, "No Access function.");
4004 if (vect_print_dump_info (REPORT_DETAILS))
4006 fprintf (vect_dump, "Access function of PHI: ");
4007 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
4010 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
4012 if (evolution_part == NULL_TREE)
4014 if (vect_print_dump_info (REPORT_DETAILS))
4015 fprintf (vect_dump, "No evolution.");
4019 /* FORNOW: We do not transform initial conditions of IVs
4020 which evolution functions are a polynomial of degree >= 2. */
4022 if (tree_is_chrec (evolution_part))
4030 /* Function vect_get_loop_niters.
4032 Determine how many iterations the loop is executed.
4033 If an expression that represents the number of iterations
4034 can be constructed, place it in NUMBER_OF_ITERATIONS.
4035 Return the loop exit condition. */
4038 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
4042 if (vect_print_dump_info (REPORT_DETAILS))
4043 fprintf (vect_dump, "=== get_loop_niters ===");
4045 niters = number_of_exit_cond_executions (loop);
4047 if (niters != NULL_TREE
4048 && niters != chrec_dont_know)
4050 *number_of_iterations = niters;
4052 if (vect_print_dump_info (REPORT_DETAILS))
4054 fprintf (vect_dump, "==> get_loop_niters:" );
4055 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
4059 return get_loop_exit_condition (loop);
4063 /* Function vect_analyze_loop_1.
4065 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4066 for it. The different analyses will record information in the
4067 loop_vec_info struct. This is a subset of the analyses applied in
4068 vect_analyze_loop, to be applied on an inner-loop nested in the loop
4069 that is now considered for (outer-loop) vectorization. */
4071 static loop_vec_info
4072 vect_analyze_loop_1 (struct loop *loop)
4074 loop_vec_info loop_vinfo;
4076 if (vect_print_dump_info (REPORT_DETAILS))
4077 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
4079 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4081 loop_vinfo = vect_analyze_loop_form (loop);
4084 if (vect_print_dump_info (REPORT_DETAILS))
4085 fprintf (vect_dump, "bad inner-loop form.");
4093 /* Function vect_analyze_loop_form.
4095 Verify that certain CFG restrictions hold, including:
4096 - the loop has a pre-header
4097 - the loop has a single entry and exit
4098 - the loop exit condition is simple enough, and the number of iterations
4099 can be analyzed (a countable loop). */
4102 vect_analyze_loop_form (struct loop *loop)
4104 loop_vec_info loop_vinfo;
4106 tree number_of_iterations = NULL;
4107 loop_vec_info inner_loop_vinfo = NULL;
4109 if (vect_print_dump_info (REPORT_DETAILS))
4110 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
4112 /* Different restrictions apply when we are considering an inner-most loop,
4113 vs. an outer (nested) loop.
4114 (FORNOW. May want to relax some of these restrictions in the future). */
4118 /* Inner-most loop. We currently require that the number of BBs is
4119 exactly 2 (the header and latch). Vectorizable inner-most loops
4130 if (loop->num_nodes != 2)
4132 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4133 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4137 if (empty_block_p (loop->header))
4139 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4140 fprintf (vect_dump, "not vectorized: empty loop.");
4146 struct loop *innerloop = loop->inner;
4147 edge backedge, entryedge;
4149 /* Nested loop. We currently require that the loop is doubly-nested,
4150 contains a single inner loop, and the number of BBs is exactly 5.
4151 Vectorizable outer-loops look like this:
4163 The inner-loop has the properties expected of inner-most loops
4164 as described above. */
4166 if ((loop->inner)->inner || (loop->inner)->next)
4168 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4169 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4173 /* Analyze the inner-loop. */
4174 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4175 if (!inner_loop_vinfo)
4177 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4178 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4182 if (!expr_invariant_in_loop_p (loop,
4183 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4185 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4187 "not vectorized: inner-loop count not invariant.");
4188 destroy_loop_vec_info (inner_loop_vinfo, true);
4192 if (loop->num_nodes != 5)
4194 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4195 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4196 destroy_loop_vec_info (inner_loop_vinfo, true);
4200 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4201 backedge = EDGE_PRED (innerloop->header, 1);
4202 entryedge = EDGE_PRED (innerloop->header, 0);
4203 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4205 backedge = EDGE_PRED (innerloop->header, 0);
4206 entryedge = EDGE_PRED (innerloop->header, 1);
4209 if (entryedge->src != loop->header
4210 || !single_exit (innerloop)
4211 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4213 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4214 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4215 destroy_loop_vec_info (inner_loop_vinfo, true);
4219 if (vect_print_dump_info (REPORT_DETAILS))
4220 fprintf (vect_dump, "Considering outer-loop vectorization.");
4223 if (!single_exit (loop)
4224 || EDGE_COUNT (loop->header->preds) != 2)
4226 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4228 if (!single_exit (loop))
4229 fprintf (vect_dump, "not vectorized: multiple exits.");
4230 else if (EDGE_COUNT (loop->header->preds) != 2)
4231 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4233 if (inner_loop_vinfo)
4234 destroy_loop_vec_info (inner_loop_vinfo, true);
4238 /* We assume that the loop exit condition is at the end of the loop. i.e,
4239 that the loop is represented as a do-while (with a proper if-guard
4240 before the loop if needed), where the loop header contains all the
4241 executable statements, and the latch is empty. */
4242 if (!empty_block_p (loop->latch)
4243 || phi_nodes (loop->latch))
4245 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4246 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4247 if (inner_loop_vinfo)
4248 destroy_loop_vec_info (inner_loop_vinfo, true);
4252 /* Make sure there exists a single-predecessor exit bb: */
4253 if (!single_pred_p (single_exit (loop)->dest))
4255 edge e = single_exit (loop);
4256 if (!(e->flags & EDGE_ABNORMAL))
4258 split_loop_exit_edge (e);
4259 if (vect_print_dump_info (REPORT_DETAILS))
4260 fprintf (vect_dump, "split exit edge.");
4264 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4265 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4266 if (inner_loop_vinfo)
4267 destroy_loop_vec_info (inner_loop_vinfo, true);
4272 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4275 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4276 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4277 if (inner_loop_vinfo)
4278 destroy_loop_vec_info (inner_loop_vinfo, true);
4282 if (!number_of_iterations)
4284 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4286 "not vectorized: number of iterations cannot be computed.");
4287 if (inner_loop_vinfo)
4288 destroy_loop_vec_info (inner_loop_vinfo, true);
4292 if (chrec_contains_undetermined (number_of_iterations))
4294 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4295 fprintf (vect_dump, "Infinite number of iterations.");
4296 if (inner_loop_vinfo)
4297 destroy_loop_vec_info (inner_loop_vinfo, true);
4301 if (!NITERS_KNOWN_P (number_of_iterations))
4303 if (vect_print_dump_info (REPORT_DETAILS))
4305 fprintf (vect_dump, "Symbolic number of iterations is ");
4306 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4309 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4311 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4312 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4313 if (inner_loop_vinfo)
4314 destroy_loop_vec_info (inner_loop_vinfo, false);
4318 loop_vinfo = new_loop_vec_info (loop);
4319 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4320 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4322 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4324 /* CHECKME: May want to keep it around it in the future. */
4325 if (inner_loop_vinfo)
4326 destroy_loop_vec_info (inner_loop_vinfo, false);
4328 gcc_assert (!loop->aux);
4329 loop->aux = loop_vinfo;
4334 /* Function vect_analyze_loop.
4336 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4337 for it. The different analyses will record information in the
4338 loop_vec_info struct. */
4340 vect_analyze_loop (struct loop *loop)
4343 loop_vec_info loop_vinfo;
4345 if (vect_print_dump_info (REPORT_DETAILS))
4346 fprintf (vect_dump, "===== analyze_loop_nest =====");
4348 if (loop_outer (loop)
4349 && loop_vec_info_for_loop (loop_outer (loop))
4350 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4352 if (vect_print_dump_info (REPORT_DETAILS))
4353 fprintf (vect_dump, "outer-loop already vectorized.");
4357 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4359 loop_vinfo = vect_analyze_loop_form (loop);
4362 if (vect_print_dump_info (REPORT_DETAILS))
4363 fprintf (vect_dump, "bad loop form.");
4367 /* Find all data references in the loop (which correspond to vdefs/vuses)
4368 and analyze their evolution in the loop.
4370 FORNOW: Handle only simple, array references, which
4371 alignment can be forced, and aligned pointer-references. */
4373 ok = vect_analyze_data_refs (loop_vinfo);
4376 if (vect_print_dump_info (REPORT_DETAILS))
4377 fprintf (vect_dump, "bad data references.");
4378 destroy_loop_vec_info (loop_vinfo, true);
4382 /* Classify all cross-iteration scalar data-flow cycles.
4383 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4385 vect_analyze_scalar_cycles (loop_vinfo);
4387 vect_pattern_recog (loop_vinfo);
4389 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4391 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4394 if (vect_print_dump_info (REPORT_DETAILS))
4395 fprintf (vect_dump, "unexpected pattern.");
4396 destroy_loop_vec_info (loop_vinfo, true);
4400 /* Analyze the alignment of the data-refs in the loop.
4401 Fail if a data reference is found that cannot be vectorized. */
4403 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4406 if (vect_print_dump_info (REPORT_DETAILS))
4407 fprintf (vect_dump, "bad data alignment.");
4408 destroy_loop_vec_info (loop_vinfo, true);
4412 ok = vect_determine_vectorization_factor (loop_vinfo);
4415 if (vect_print_dump_info (REPORT_DETAILS))
4416 fprintf (vect_dump, "can't determine vectorization factor.");
4417 destroy_loop_vec_info (loop_vinfo, true);
4421 /* Analyze data dependences between the data-refs in the loop.
4422 FORNOW: fail at the first data dependence that we encounter. */
4424 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4427 if (vect_print_dump_info (REPORT_DETAILS))
4428 fprintf (vect_dump, "bad data dependence.");
4429 destroy_loop_vec_info (loop_vinfo, true);
4433 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4434 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4436 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4439 if (vect_print_dump_info (REPORT_DETAILS))
4440 fprintf (vect_dump, "bad data access.");
4441 destroy_loop_vec_info (loop_vinfo, true);
4445 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4446 It is important to call pruning after vect_analyze_data_ref_accesses,
4447 since we use grouping information gathered by interleaving analysis. */
4448 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4451 if (vect_print_dump_info (REPORT_DETAILS))
4452 fprintf (vect_dump, "too long list of versioning for alias "
4454 destroy_loop_vec_info (loop_vinfo, true);
4458 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4459 ok = vect_analyze_slp (loop_vinfo);
4462 /* Decide which possible SLP instances to SLP. */
4463 vect_make_slp_decision (loop_vinfo);
4465 /* Find stmts that need to be both vectorized and SLPed. */
4466 vect_detect_hybrid_slp (loop_vinfo);
4469 /* This pass will decide on using loop versioning and/or loop peeling in
4470 order to enhance the alignment of data references in the loop. */
4472 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4475 if (vect_print_dump_info (REPORT_DETAILS))
4476 fprintf (vect_dump, "bad data alignment.");
4477 destroy_loop_vec_info (loop_vinfo, true);
4481 /* Scan all the operations in the loop and make sure they are
4484 ok = vect_analyze_operations (loop_vinfo);
4487 if (vect_print_dump_info (REPORT_DETAILS))
4488 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4489 destroy_loop_vec_info (loop_vinfo, true);
4493 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;