1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
44 /* Main analysis functions. */
45 static loop_vec_info vect_analyze_loop_form (struct loop *);
46 static bool vect_analyze_data_refs (loop_vec_info);
47 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
48 static void vect_analyze_scalar_cycles (loop_vec_info);
49 static bool vect_analyze_data_ref_accesses (loop_vec_info);
50 static bool vect_analyze_data_ref_dependences (loop_vec_info);
51 static bool vect_analyze_data_refs_alignment (loop_vec_info);
52 static bool vect_compute_data_refs_alignment (loop_vec_info);
53 static bool vect_enhance_data_refs_alignment (loop_vec_info);
54 static bool vect_analyze_operations (loop_vec_info);
55 static bool vect_determine_vectorization_factor (loop_vec_info);
57 /* Utility functions for the analyses. */
58 static bool exist_non_indexing_operands_for_use_p (tree, tree);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
68 /* Function vect_determine_vectorization_factor
70 Determine the vectorization factor (VF). VF is the number of data elements
71 that are operated upon in parallel in a single iteration of the vectorized
72 loop. For example, when vectorizing a loop that operates on 4byte elements,
73 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
74 elements can fit in a single vector register.
76 We currently support vectorization of loops in which all types operated upon
77 are of the same size. Therefore this function currently sets VF according to
78 the size of the types operated upon, and fails if there are multiple sizes
81 VF is also the factor by which the loop iterations are strip-mined, e.g.:
88 for (i=0; i<N; i+=VF){
89 a[i:VF] = b[i:VF] + c[i:VF];
94 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
97 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
98 int nbbs = loop->num_nodes;
99 block_stmt_iterator si;
100 unsigned int vectorization_factor = 0;
105 stmt_vec_info stmt_info;
108 if (vect_print_dump_info (REPORT_DETAILS))
109 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
111 for (i = 0; i < nbbs; i++)
113 basic_block bb = bbs[i];
115 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
117 stmt_info = vinfo_for_stmt (phi);
118 if (vect_print_dump_info (REPORT_DETAILS))
120 fprintf (vect_dump, "==> examining phi: ");
121 print_generic_expr (vect_dump, phi, TDF_SLIM);
124 gcc_assert (stmt_info);
126 if (STMT_VINFO_RELEVANT_P (stmt_info))
128 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
129 scalar_type = TREE_TYPE (PHI_RESULT (phi));
131 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "get vectype for scalar type: ");
134 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
137 vectype = get_vectype_for_scalar_type (scalar_type);
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
143 "not vectorized: unsupported data-type ");
144 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
148 STMT_VINFO_VECTYPE (stmt_info) = vectype;
150 if (vect_print_dump_info (REPORT_DETAILS))
152 fprintf (vect_dump, "vectype: ");
153 print_generic_expr (vect_dump, vectype, TDF_SLIM);
156 nunits = TYPE_VECTOR_SUBPARTS (vectype);
157 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "nunits = %d", nunits);
160 if (!vectorization_factor
161 || (nunits > vectorization_factor))
162 vectorization_factor = nunits;
166 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
168 tree stmt = bsi_stmt (si);
169 stmt_info = vinfo_for_stmt (stmt);
171 if (vect_print_dump_info (REPORT_DETAILS))
173 fprintf (vect_dump, "==> examining statement: ");
174 print_generic_expr (vect_dump, stmt, TDF_SLIM);
177 gcc_assert (stmt_info);
179 /* skip stmts which do not need to be vectorized. */
180 if (!STMT_VINFO_RELEVANT_P (stmt_info)
181 && !STMT_VINFO_LIVE_P (stmt_info))
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "skip.");
188 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
190 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
192 fprintf (vect_dump, "not vectorized: irregular stmt.");
193 print_generic_expr (vect_dump, stmt, TDF_SLIM);
198 if (!GIMPLE_STMT_P (stmt)
199 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
201 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
204 print_generic_expr (vect_dump, stmt, TDF_SLIM);
209 if (STMT_VINFO_VECTYPE (stmt_info))
211 /* The only case when a vectype had been already set is for stmts
212 that contain a dataref, or for "pattern-stmts" (stmts generated
213 by the vectorizer to represent/replace a certain idiom). */
214 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
215 || is_pattern_stmt_p (stmt_info));
216 vectype = STMT_VINFO_VECTYPE (stmt_info);
222 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
223 && !is_pattern_stmt_p (stmt_info));
225 /* We generally set the vectype according to the type of the
227 For stmts whose result-type is different than the type of the
228 arguments (e.g. demotion, promotion), vectype will be reset
229 appropriately (later). Note that we have to visit the smallest
230 datatype in this function, because that determines the VF.
231 If the smallest datatype in the loop is present only as the
232 rhs of a promotion operation - we'd miss it here.
233 Such a case, where a variable of this datatype does not appear
234 in the lhs anywhere in the loop, can only occur if it's an
235 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
236 to have been optimized away by invariant motion. However, we
237 cannot rely on invariant motion to always take invariants out
238 of the loop, and so in the case of promotion we also have to
240 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
242 operation = GIMPLE_STMT_OPERAND (stmt, 1);
243 if (TREE_CODE (operation) == NOP_EXPR
244 || TREE_CODE (operation) == CONVERT_EXPR
245 || TREE_CODE (operation) == WIDEN_MULT_EXPR)
247 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
248 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
249 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
250 scalar_type = rhs_type;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "get vectype for scalar type: ");
256 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
259 vectype = get_vectype_for_scalar_type (scalar_type);
262 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
265 "not vectorized: unsupported data-type ");
266 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
270 STMT_VINFO_VECTYPE (stmt_info) = vectype;
273 if (vect_print_dump_info (REPORT_DETAILS))
275 fprintf (vect_dump, "vectype: ");
276 print_generic_expr (vect_dump, vectype, TDF_SLIM);
279 nunits = TYPE_VECTOR_SUBPARTS (vectype);
280 if (vect_print_dump_info (REPORT_DETAILS))
281 fprintf (vect_dump, "nunits = %d", nunits);
283 if (!vectorization_factor
284 || (nunits > vectorization_factor))
285 vectorization_factor = nunits;
290 /* TODO: Analyze cost. Decide if worth while to vectorize. */
291 if (vect_print_dump_info (REPORT_DETAILS))
292 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
293 if (vectorization_factor <= 1)
295 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
296 fprintf (vect_dump, "not vectorized: unsupported data-type");
299 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
305 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
306 the number of created vector stmts depends on the unrolling factor). However,
307 the actual number of vector stmts for every SLP node depends on VF which is
308 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
309 In this function we assume that the inside costs calculated in
310 vect_model_xxx_cost are linear in ncopies. */
313 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
315 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
316 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
317 slp_instance instance;
319 if (vect_print_dump_info (REPORT_SLP))
320 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
322 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
323 /* We assume that costs are linear in ncopies. */
324 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
325 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
329 /* Function vect_analyze_operations.
331 Scan the loop stmts and make sure they are all vectorizable. */
334 vect_analyze_operations (loop_vec_info loop_vinfo)
336 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
337 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
338 int nbbs = loop->num_nodes;
339 block_stmt_iterator si;
340 unsigned int vectorization_factor = 0;
344 stmt_vec_info stmt_info;
345 bool need_to_vectorize = false;
346 int min_profitable_iters;
347 int min_scalar_loop_bound;
349 bool only_slp_in_loop = true;
351 if (vect_print_dump_info (REPORT_DETAILS))
352 fprintf (vect_dump, "=== vect_analyze_operations ===");
354 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
355 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
357 for (i = 0; i < nbbs; i++)
359 basic_block bb = bbs[i];
361 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
365 stmt_info = vinfo_for_stmt (phi);
366 if (vect_print_dump_info (REPORT_DETAILS))
368 fprintf (vect_dump, "examining phi: ");
369 print_generic_expr (vect_dump, phi, TDF_SLIM);
372 if (! is_loop_header_bb_p (bb))
374 /* inner-loop loop-closed exit phi in outer-loop vectorization
375 (i.e. a phi in the tail of the outer-loop).
376 FORNOW: we currently don't support the case that these phis
377 are not used in the outerloop, cause this case requires
378 to actually do something here. */
379 if (!STMT_VINFO_RELEVANT_P (stmt_info)
380 || STMT_VINFO_LIVE_P (stmt_info))
382 if (vect_print_dump_info (REPORT_DETAILS))
384 "Unsupported loop-closed phi in outer-loop.");
390 gcc_assert (stmt_info);
392 if (STMT_VINFO_LIVE_P (stmt_info))
394 /* FORNOW: not yet supported. */
395 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
396 fprintf (vect_dump, "not vectorized: value used after loop.");
400 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
401 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
403 /* A scalar-dependence cycle that we don't support. */
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
405 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
409 if (STMT_VINFO_RELEVANT_P (stmt_info))
411 need_to_vectorize = true;
412 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
413 ok = vectorizable_induction (phi, NULL, NULL);
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
421 "not vectorized: relevant phi not supported: ");
422 print_generic_expr (vect_dump, phi, TDF_SLIM);
428 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
430 tree stmt = bsi_stmt (si);
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
432 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
434 if (vect_print_dump_info (REPORT_DETAILS))
436 fprintf (vect_dump, "==> examining statement: ");
437 print_generic_expr (vect_dump, stmt, TDF_SLIM);
440 gcc_assert (stmt_info);
442 /* skip stmts which do not need to be vectorized.
443 this is expected to include:
444 - the COND_EXPR which is the loop exit condition
445 - any LABEL_EXPRs in the loop
446 - computations that are used only for array indexing or loop
449 if (!STMT_VINFO_RELEVANT_P (stmt_info)
450 && !STMT_VINFO_LIVE_P (stmt_info))
452 if (vect_print_dump_info (REPORT_DETAILS))
453 fprintf (vect_dump, "irrelevant.");
457 switch (STMT_VINFO_DEF_TYPE (stmt_info))
462 case vect_reduction_def:
463 gcc_assert (relevance == vect_used_in_outer
464 || relevance == vect_used_in_outer_by_reduction
465 || relevance == vect_unused_in_loop);
468 case vect_induction_def:
469 case vect_constant_def:
470 case vect_invariant_def:
471 case vect_unknown_def_type:
476 if (STMT_VINFO_RELEVANT_P (stmt_info))
478 gcc_assert (GIMPLE_STMT_P (stmt)
479 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
480 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
481 need_to_vectorize = true;
484 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
485 || vectorizable_type_demotion (stmt, NULL, NULL)
486 || vectorizable_conversion (stmt, NULL, NULL, NULL)
487 || vectorizable_operation (stmt, NULL, NULL, NULL)
488 || vectorizable_assignment (stmt, NULL, NULL, NULL)
489 || vectorizable_load (stmt, NULL, NULL, NULL)
490 || vectorizable_call (stmt, NULL, NULL)
491 || vectorizable_store (stmt, NULL, NULL, NULL)
492 || vectorizable_condition (stmt, NULL, NULL)
493 || vectorizable_reduction (stmt, NULL, NULL));
495 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
496 need extra handling, except for vectorizable reductions. */
497 if (STMT_VINFO_LIVE_P (stmt_info)
498 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
499 ok |= vectorizable_live_operation (stmt, NULL, NULL);
503 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
505 fprintf (vect_dump, "not vectorized: stmt not supported: ");
506 print_generic_expr (vect_dump, stmt, TDF_SLIM);
511 if (!PURE_SLP_STMT (stmt_info))
513 /* STMT needs loop-based vectorization. */
514 only_slp_in_loop = false;
516 /* Groups of strided accesses whose size is not a power of 2 are
517 not vectorizable yet using loop-vectorization. Therefore, if
518 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
519 both SLPed and loop-based vectorzed), the loop cannot be
521 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
522 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
523 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
525 if (vect_print_dump_info (REPORT_DETAILS))
527 fprintf (vect_dump, "not vectorized: the size of group "
528 "of strided accesses is not a power of 2");
529 print_generic_expr (vect_dump, stmt, TDF_SLIM);
537 /* All operations in the loop are either irrelevant (deal with loop
538 control, or dead), or only used outside the loop and can be moved
539 out of the loop (e.g. invariants, inductions). The loop can be
540 optimized away by scalar optimizations. We're better off not
541 touching this loop. */
542 if (!need_to_vectorize)
544 if (vect_print_dump_info (REPORT_DETAILS))
546 "All the computation can be taken out of the loop.");
547 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
549 "not vectorized: redundant loop. no profit to vectorize.");
553 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
554 vectorization factor of the loop is the unrolling factor required by the
555 SLP instances. If that unrolling factor is 1, we say, that we perform
556 pure SLP on loop - cross iteration parallelism is not exploited. */
557 if (only_slp_in_loop)
558 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
560 vectorization_factor = least_common_multiple (vectorization_factor,
561 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
563 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
565 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
566 && vect_print_dump_info (REPORT_DETAILS))
568 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
569 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
571 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
572 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
574 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
575 fprintf (vect_dump, "not vectorized: iteration count too small.");
576 if (vect_print_dump_info (REPORT_DETAILS))
577 fprintf (vect_dump,"not vectorized: iteration count smaller than "
578 "vectorization factor.");
582 /* Analyze cost. Decide if worth while to vectorize. */
584 /* Once VF is set, SLP costs should be updated since the number of created
585 vector stmts depends on VF. */
586 vect_update_slp_costs_according_to_vf (loop_vinfo);
588 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
589 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
590 if (min_profitable_iters < 0)
592 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
593 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
594 if (vect_print_dump_info (REPORT_DETAILS))
595 fprintf (vect_dump, "not vectorized: vector version will never be "
600 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
601 * vectorization_factor) - 1);
603 /* Use the cost model only if it is more conservative than user specified
606 th = (unsigned) min_scalar_loop_bound;
607 if (min_profitable_iters
608 && (!min_scalar_loop_bound
609 || min_profitable_iters > min_scalar_loop_bound))
610 th = (unsigned) min_profitable_iters;
612 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
613 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
615 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
616 fprintf (vect_dump, "not vectorized: vectorization not "
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "not vectorized: iteration count smaller than "
620 "user specified loop bound parameter or minimum "
621 "profitable iterations (whichever is more conservative).");
625 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
626 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
627 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
629 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "epilog loop required.");
631 if (!vect_can_advance_ivs_p (loop_vinfo))
633 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
635 "not vectorized: can't create epilog loop 1.");
638 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
640 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
642 "not vectorized: can't create epilog loop 2.");
651 /* Function exist_non_indexing_operands_for_use_p
653 USE is one of the uses attached to STMT. Check if USE is
654 used in STMT for anything other than indexing an array. */
657 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
660 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
662 /* USE corresponds to some operand in STMT. If there is no data
663 reference in STMT, then any operand that corresponds to USE
664 is not indexing an array. */
665 if (!STMT_VINFO_DATA_REF (stmt_info))
668 /* STMT has a data_ref. FORNOW this means that its of one of
672 (This should have been verified in analyze_data_refs).
674 'var' in the second case corresponds to a def, not a use,
675 so USE cannot correspond to any operands that are not used
678 Therefore, all we need to check is if STMT falls into the
679 first case, and whether var corresponds to USE. */
681 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
684 operand = GIMPLE_STMT_OPERAND (stmt, 1);
686 if (TREE_CODE (operand) != SSA_NAME)
696 /* Function vect_analyze_scalar_cycles_1.
698 Examine the cross iteration def-use cycles of scalar variables
699 in LOOP. LOOP_VINFO represents the loop that is noe being
700 considered for vectorization (can be LOOP, or an outer-loop
704 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
707 basic_block bb = loop->header;
709 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
711 if (vect_print_dump_info (REPORT_DETAILS))
712 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
714 /* First - identify all inductions. */
715 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
717 tree access_fn = NULL;
718 tree def = PHI_RESULT (phi);
719 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
721 if (vect_print_dump_info (REPORT_DETAILS))
723 fprintf (vect_dump, "Analyze phi: ");
724 print_generic_expr (vect_dump, phi, TDF_SLIM);
727 /* Skip virtual phi's. The data dependences that are associated with
728 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
729 if (!is_gimple_reg (SSA_NAME_VAR (def)))
732 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
734 /* Analyze the evolution function. */
735 access_fn = analyze_scalar_evolution (loop, def);
736 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
738 fprintf (vect_dump, "Access function of PHI: ");
739 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
743 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
745 VEC_safe_push (tree, heap, worklist, phi);
749 if (vect_print_dump_info (REPORT_DETAILS))
750 fprintf (vect_dump, "Detected induction.");
751 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
755 /* Second - identify all reductions. */
756 while (VEC_length (tree, worklist) > 0)
758 tree phi = VEC_pop (tree, worklist);
759 tree def = PHI_RESULT (phi);
760 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
763 if (vect_print_dump_info (REPORT_DETAILS))
765 fprintf (vect_dump, "Analyze phi: ");
766 print_generic_expr (vect_dump, phi, TDF_SLIM);
769 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
770 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
772 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
775 if (vect_print_dump_info (REPORT_DETAILS))
776 fprintf (vect_dump, "Detected reduction.");
777 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
778 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
782 if (vect_print_dump_info (REPORT_DETAILS))
783 fprintf (vect_dump, "Unknown def-use cycle pattern.");
786 VEC_free (tree, heap, worklist);
791 /* Function vect_analyze_scalar_cycles.
793 Examine the cross iteration def-use cycles of scalar variables, by
794 analyzing the loop-header PHIs of scalar variables; Classify each
795 cycle as one of the following: invariant, induction, reduction, unknown.
796 We do that for the loop represented by LOOP_VINFO, and also to its
797 inner-loop, if exists.
798 Examples for scalar cycles:
813 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
815 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
817 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
819 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
820 Reductions in such inner-loop therefore have different properties than
821 the reductions in the nest that gets vectorized:
822 1. When vectorized, they are executed in the same order as in the original
823 scalar loop, so we can't change the order of computation when
825 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
826 current checks are too strict. */
829 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
833 /* Function vect_insert_into_interleaving_chain.
835 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
838 vect_insert_into_interleaving_chain (struct data_reference *dra,
839 struct data_reference *drb)
841 tree prev, next, next_init;
842 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
843 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
845 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
846 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
849 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
850 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
853 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
854 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
858 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
861 /* We got to the end of the list. Insert here. */
862 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
863 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
867 /* Function vect_update_interleaving_chain.
869 For two data-refs DRA and DRB that are a part of a chain interleaved data
870 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
872 There are four possible cases:
873 1. New stmts - both DRA and DRB are not a part of any chain:
876 2. DRB is a part of a chain and DRA is not:
877 no need to update FIRST_DR
878 no need to insert DRB
879 insert DRA according to init
880 3. DRA is a part of a chain and DRB is not:
881 if (init of FIRST_DR > init of DRB)
883 NEXT(FIRST_DR) = previous FIRST_DR
885 insert DRB according to its init
886 4. both DRA and DRB are in some interleaving chains:
887 choose the chain with the smallest init of FIRST_DR
888 insert the nodes of the second chain into the first one. */
891 vect_update_interleaving_chain (struct data_reference *drb,
892 struct data_reference *dra)
894 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
895 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
896 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
897 tree node, prev, next, node_init, first_stmt;
899 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
900 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
902 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
903 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
904 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
908 /* 2. DRB is a part of a chain and DRA is not. */
909 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
911 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
912 /* Insert DRA into the chain of DRB. */
913 vect_insert_into_interleaving_chain (dra, drb);
917 /* 3. DRA is a part of a chain and DRB is not. */
918 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
920 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
921 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
925 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
927 /* DRB's init is smaller than the init of the stmt previously marked
928 as the first stmt of the interleaving chain of DRA. Therefore, we
929 update FIRST_STMT and put DRB in the head of the list. */
930 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
931 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
933 /* Update all the stmts in the list to point to the new FIRST_STMT. */
934 tmp = old_first_stmt;
937 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
938 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
943 /* Insert DRB in the list of DRA. */
944 vect_insert_into_interleaving_chain (drb, dra);
945 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
950 /* 4. both DRA and DRB are in some interleaving chains. */
951 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
952 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
953 if (first_a == first_b)
955 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
956 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
958 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
960 /* Insert the nodes of DRA chain into the DRB chain.
961 After inserting a node, continue from this node of the DRB chain (don't
962 start from the beginning. */
963 node = DR_GROUP_FIRST_DR (stmtinfo_a);
964 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
965 first_stmt = first_b;
969 /* Insert the nodes of DRB chain into the DRA chain.
970 After inserting a node, continue from this node of the DRA chain (don't
971 start from the beginning. */
972 node = DR_GROUP_FIRST_DR (stmtinfo_b);
973 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
974 first_stmt = first_a;
979 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
980 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
983 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
984 if (tree_int_cst_compare (next_init, node_init) > 0)
987 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
988 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
993 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
997 /* We got to the end of the list. Insert here. */
998 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
999 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
1002 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1003 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1008 /* Function vect_equal_offsets.
1010 Check if OFFSET1 and OFFSET2 are identical expressions. */
1013 vect_equal_offsets (tree offset1, tree offset2)
1017 STRIP_NOPS (offset1);
1018 STRIP_NOPS (offset2);
1020 if (offset1 == offset2)
1023 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1024 || !BINARY_CLASS_P (offset1)
1025 || !BINARY_CLASS_P (offset2))
1028 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1029 TREE_OPERAND (offset2, 0));
1030 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1031 TREE_OPERAND (offset2, 1));
1033 return (res0 && res1);
1037 /* Function vect_check_interleaving.
1039 Check if DRA and DRB are a part of interleaving. In case they are, insert
1040 DRA and DRB in an interleaving chain. */
1043 vect_check_interleaving (struct data_reference *dra,
1044 struct data_reference *drb)
1046 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1048 /* Check that the data-refs have same first location (except init) and they
1049 are both either store or load (not load and store). */
1050 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1051 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1052 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1053 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1054 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1055 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1056 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1057 || DR_IS_READ (dra) != DR_IS_READ (drb))
1061 1. data-refs are of the same type
1062 2. their steps are equal
1063 3. the step is greater than the difference between data-refs' inits */
1064 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1065 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1067 if (type_size_a != type_size_b
1068 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1071 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1072 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1073 step = TREE_INT_CST_LOW (DR_STEP (dra));
1075 if (init_a > init_b)
1077 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1078 and DRB is accessed before DRA. */
1079 diff_mod_size = (init_a - init_b) % type_size_a;
1081 if ((init_a - init_b) > step)
1084 if (diff_mod_size == 0)
1086 vect_update_interleaving_chain (drb, dra);
1087 if (vect_print_dump_info (REPORT_DR_DETAILS))
1089 fprintf (vect_dump, "Detected interleaving ");
1090 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1091 fprintf (vect_dump, " and ");
1092 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1099 /* If init_b == init_a + the size of the type * k, we have an
1100 interleaving, and DRA is accessed before DRB. */
1101 diff_mod_size = (init_b - init_a) % type_size_a;
1103 if ((init_b - init_a) > step)
1106 if (diff_mod_size == 0)
1108 vect_update_interleaving_chain (dra, drb);
1109 if (vect_print_dump_info (REPORT_DR_DETAILS))
1111 fprintf (vect_dump, "Detected interleaving ");
1112 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1113 fprintf (vect_dump, " and ");
1114 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1122 /* Function vect_analyze_data_ref_dependence.
1124 Return TRUE if there (might) exist a dependence between a memory-reference
1125 DRA and a memory-reference DRB. */
1128 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1129 loop_vec_info loop_vinfo)
1132 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1133 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1134 struct data_reference *dra = DDR_A (ddr);
1135 struct data_reference *drb = DDR_B (ddr);
1136 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1137 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1138 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1139 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1140 lambda_vector dist_v;
1141 unsigned int loop_depth;
1143 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1145 /* Independent data accesses. */
1146 vect_check_interleaving (dra, drb);
1150 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1153 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1155 if (vect_print_dump_info (REPORT_DR_DETAILS))
1158 "versioning for alias required: can't determine dependence between ");
1159 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1160 fprintf (vect_dump, " and ");
1161 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1166 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1168 if (vect_print_dump_info (REPORT_DR_DETAILS))
1170 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1171 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1172 fprintf (vect_dump, " and ");
1173 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1178 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1179 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1181 int dist = dist_v[loop_depth];
1183 if (vect_print_dump_info (REPORT_DR_DETAILS))
1184 fprintf (vect_dump, "dependence distance = %d.", dist);
1186 /* Same loop iteration. */
1187 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1189 /* Two references with distance zero have the same alignment. */
1190 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1191 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1192 if (vect_print_dump_info (REPORT_ALIGNMENT))
1193 fprintf (vect_dump, "accesses have the same alignment.");
1194 if (vect_print_dump_info (REPORT_DR_DETAILS))
1196 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1197 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1198 fprintf (vect_dump, " and ");
1199 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1202 /* For interleaving, mark that there is a read-write dependency if
1203 necessary. We check before that one of the data-refs is store. */
1204 if (DR_IS_READ (dra))
1205 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1208 if (DR_IS_READ (drb))
1209 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1215 if (abs (dist) >= vectorization_factor)
1217 /* Dependence distance does not create dependence, as far as vectorization
1218 is concerned, in this case. */
1219 if (vect_print_dump_info (REPORT_DR_DETAILS))
1220 fprintf (vect_dump, "dependence distance >= VF.");
1224 if (vect_print_dump_info (REPORT_DR_DETAILS))
1227 "versioning for alias required: possible dependence "
1228 "between data-refs ");
1229 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1230 fprintf (vect_dump, " and ");
1231 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1240 /* Return TRUE if DDR_NEW is already found in MAY_ALIAS_DDRS list. */
1243 vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new)
1248 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
1250 tree dref_A_i, dref_B_i, dref_A_j, dref_B_j;
1252 dref_A_i = DR_REF (DDR_A (ddr));
1253 dref_B_i = DR_REF (DDR_B (ddr));
1254 dref_A_j = DR_REF (DDR_A (ddr_new));
1255 dref_B_j = DR_REF (DDR_B (ddr_new));
1257 if ((operand_equal_p (dref_A_i, dref_A_j, 0)
1258 && operand_equal_p (dref_B_i, dref_B_j, 0))
1259 || (operand_equal_p (dref_A_i, dref_B_j, 0)
1260 && operand_equal_p (dref_B_i, dref_A_j, 0)))
1262 if (vect_print_dump_info (REPORT_DR_DETAILS))
1264 fprintf (vect_dump, "found same pair of data references ");
1265 print_generic_expr (vect_dump, dref_A_i, TDF_SLIM);
1266 fprintf (vect_dump, " and ");
1267 print_generic_expr (vect_dump, dref_B_i, TDF_SLIM);
1275 /* Save DDR in LOOP_VINFO list of ddrs that may alias and need to be
1276 tested at run-time. Returns false if number of run-time checks
1277 inserted by vectorizer is greater than maximum defined by
1278 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS. */
1280 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1282 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1284 if (vect_print_dump_info (REPORT_DR_DETAILS))
1286 fprintf (vect_dump, "mark for run-time aliasing test between ");
1287 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1288 fprintf (vect_dump, " and ");
1289 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1292 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1295 if (vect_print_dump_info (REPORT_DR_DETAILS))
1296 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1300 /* Do not add to the list duplicate ddrs. */
1301 if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr))
1304 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
1305 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1307 if (vect_print_dump_info (REPORT_DR_DETAILS))
1310 "disable versioning for alias - max number of generated "
1311 "checks exceeded.");
1314 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1318 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1322 /* Function vect_analyze_data_ref_dependences.
1324 Examine all the data references in the loop, and make sure there do not
1325 exist any data dependences between them. */
1328 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1331 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1332 struct data_dependence_relation *ddr;
1334 if (vect_print_dump_info (REPORT_DETAILS))
1335 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1337 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1338 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1340 /* Add to list of ddrs that need to be tested at run-time. */
1341 if (!vect_mark_for_runtime_alias_test (ddr, loop_vinfo))
1349 /* Function vect_compute_data_ref_alignment
1351 Compute the misalignment of the data reference DR.
1354 1. If during the misalignment computation it is found that the data reference
1355 cannot be vectorized then false is returned.
1356 2. DR_MISALIGNMENT (DR) is defined.
1358 FOR NOW: No analysis is actually performed. Misalignment is calculated
1359 only for trivial cases. TODO. */
1362 vect_compute_data_ref_alignment (struct data_reference *dr)
1364 tree stmt = DR_STMT (dr);
1365 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1366 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1367 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1368 tree ref = DR_REF (dr);
1370 tree base, base_addr;
1373 tree aligned_to, alignment;
1375 if (vect_print_dump_info (REPORT_DETAILS))
1376 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1378 /* Initialize misalignment to unknown. */
1379 SET_DR_MISALIGNMENT (dr, -1);
1381 misalign = DR_INIT (dr);
1382 aligned_to = DR_ALIGNED_TO (dr);
1383 base_addr = DR_BASE_ADDRESS (dr);
1385 /* In case the dataref is in an inner-loop of the loop that is being
1386 vectorized (LOOP), we use the base and misalignment information
1387 relative to the outer-loop (LOOP). This is ok only if the misalignment
1388 stays the same throughout the execution of the inner-loop, which is why
1389 we have to check that the stride of the dataref in the inner-loop evenly
1390 divides by the vector size. */
1391 if (nested_in_vect_loop_p (loop, stmt))
1393 tree step = DR_STEP (dr);
1394 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1396 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1398 if (vect_print_dump_info (REPORT_ALIGNMENT))
1399 fprintf (vect_dump, "inner step divides the vector-size.");
1400 misalign = STMT_VINFO_DR_INIT (stmt_info);
1401 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1402 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1406 if (vect_print_dump_info (REPORT_ALIGNMENT))
1407 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1408 misalign = NULL_TREE;
1412 base = build_fold_indirect_ref (base_addr);
1413 vectype = STMT_VINFO_VECTYPE (stmt_info);
1414 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1416 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1419 if (vect_print_dump_info (REPORT_ALIGNMENT))
1421 fprintf (vect_dump, "Unknown alignment for access: ");
1422 print_generic_expr (vect_dump, base, TDF_SLIM);
1428 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1430 || (TREE_CODE (base_addr) == SSA_NAME
1431 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1432 TREE_TYPE (base_addr)))),
1434 base_aligned = true;
1436 base_aligned = false;
1440 /* Do not change the alignment of global variables if
1441 flag_section_anchors is enabled. */
1442 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1443 || (TREE_STATIC (base) && flag_section_anchors))
1445 if (vect_print_dump_info (REPORT_DETAILS))
1447 fprintf (vect_dump, "can't force alignment of ref: ");
1448 print_generic_expr (vect_dump, ref, TDF_SLIM);
1453 /* Force the alignment of the decl.
1454 NOTE: This is the only change to the code we make during
1455 the analysis phase, before deciding to vectorize the loop. */
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "force alignment");
1458 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1459 DECL_USER_ALIGN (base) = 1;
1462 /* At this point we assume that the base is aligned. */
1463 gcc_assert (base_aligned
1464 || (TREE_CODE (base) == VAR_DECL
1465 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1467 /* Modulo alignment. */
1468 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1470 if (!host_integerp (misalign, 1))
1472 /* Negative or overflowed misalignment value. */
1473 if (vect_print_dump_info (REPORT_DETAILS))
1474 fprintf (vect_dump, "unexpected misalign value");
1478 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1480 if (vect_print_dump_info (REPORT_DETAILS))
1482 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1483 print_generic_expr (vect_dump, ref, TDF_SLIM);
1490 /* Function vect_compute_data_refs_alignment
1492 Compute the misalignment of data references in the loop.
1493 Return FALSE if a data reference is found that cannot be vectorized. */
1496 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1498 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1499 struct data_reference *dr;
1502 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1503 if (!vect_compute_data_ref_alignment (dr))
1510 /* Function vect_update_misalignment_for_peel
1512 DR - the data reference whose misalignment is to be adjusted.
1513 DR_PEEL - the data reference whose misalignment is being made
1514 zero in the vector loop by the peel.
1515 NPEEL - the number of iterations in the peel loop if the misalignment
1516 of DR_PEEL is known at compile time. */
1519 vect_update_misalignment_for_peel (struct data_reference *dr,
1520 struct data_reference *dr_peel, int npeel)
1523 VEC(dr_p,heap) *same_align_drs;
1524 struct data_reference *current_dr;
1525 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1526 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1527 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1528 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1530 /* For interleaved data accesses the step in the loop must be multiplied by
1531 the size of the interleaving group. */
1532 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1533 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1534 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1535 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1537 /* It can be assumed that the data refs with the same alignment as dr_peel
1538 are aligned in the vector loop. */
1540 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1541 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1543 if (current_dr != dr)
1545 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1546 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1547 SET_DR_MISALIGNMENT (dr, 0);
1551 if (known_alignment_for_access_p (dr)
1552 && known_alignment_for_access_p (dr_peel))
1554 int misal = DR_MISALIGNMENT (dr);
1555 misal += npeel * dr_size;
1556 misal %= UNITS_PER_SIMD_WORD;
1557 SET_DR_MISALIGNMENT (dr, misal);
1561 if (vect_print_dump_info (REPORT_DETAILS))
1562 fprintf (vect_dump, "Setting misalignment to -1.");
1563 SET_DR_MISALIGNMENT (dr, -1);
1567 /* Function vect_verify_datarefs_alignment
1569 Return TRUE if all data references in the loop can be
1570 handled with respect to alignment. */
1573 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1575 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1576 struct data_reference *dr;
1577 enum dr_alignment_support supportable_dr_alignment;
1580 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1582 tree stmt = DR_STMT (dr);
1583 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1585 /* For interleaving, only the alignment of the first access matters. */
1586 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1587 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1590 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1591 if (!supportable_dr_alignment)
1593 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1595 if (DR_IS_READ (dr))
1597 "not vectorized: unsupported unaligned load.");
1600 "not vectorized: unsupported unaligned store.");
1604 if (supportable_dr_alignment != dr_aligned
1605 && vect_print_dump_info (REPORT_ALIGNMENT))
1606 fprintf (vect_dump, "Vectorizing an unaligned access.");
1612 /* Function vector_alignment_reachable_p
1614 Return true if vector alignment for DR is reachable by peeling
1615 a few loop iterations. Return false otherwise. */
1618 vector_alignment_reachable_p (struct data_reference *dr)
1620 tree stmt = DR_STMT (dr);
1621 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1622 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1624 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1626 /* For interleaved access we peel only if number of iterations in
1627 the prolog loop ({VF - misalignment}), is a multiple of the
1628 number of the interleaved accesses. */
1629 int elem_size, mis_in_elements;
1630 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1632 /* FORNOW: handle only known alignment. */
1633 if (!known_alignment_for_access_p (dr))
1636 elem_size = UNITS_PER_SIMD_WORD / nelements;
1637 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1639 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1643 /* If misalignment is known at the compile time then allow peeling
1644 only if natural alignment is reachable through peeling. */
1645 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1647 HOST_WIDE_INT elmsize =
1648 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1649 if (vect_print_dump_info (REPORT_DETAILS))
1651 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1652 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1654 if (DR_MISALIGNMENT (dr) % elmsize)
1656 if (vect_print_dump_info (REPORT_DETAILS))
1657 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1662 if (!known_alignment_for_access_p (dr))
1664 tree type = (TREE_TYPE (DR_REF (dr)));
1665 tree ba = DR_BASE_OBJECT (dr);
1666 bool is_packed = false;
1669 is_packed = contains_packed_reference (ba);
1671 if (vect_print_dump_info (REPORT_DETAILS))
1672 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1673 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1682 /* Function vect_enhance_data_refs_alignment
1684 This pass will use loop versioning and loop peeling in order to enhance
1685 the alignment of data references in the loop.
1687 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1688 original loop is to be vectorized; Any other loops that are created by
1689 the transformations performed in this pass - are not supposed to be
1690 vectorized. This restriction will be relaxed.
1692 This pass will require a cost model to guide it whether to apply peeling
1693 or versioning or a combination of the two. For example, the scheme that
1694 intel uses when given a loop with several memory accesses, is as follows:
1695 choose one memory access ('p') which alignment you want to force by doing
1696 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1697 other accesses are not necessarily aligned, or (2) use loop versioning to
1698 generate one loop in which all accesses are aligned, and another loop in
1699 which only 'p' is necessarily aligned.
1701 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1702 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1703 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1705 Devising a cost model is the most critical aspect of this work. It will
1706 guide us on which access to peel for, whether to use loop versioning, how
1707 many versions to create, etc. The cost model will probably consist of
1708 generic considerations as well as target specific considerations (on
1709 powerpc for example, misaligned stores are more painful than misaligned
1712 Here are the general steps involved in alignment enhancements:
1714 -- original loop, before alignment analysis:
1715 for (i=0; i<N; i++){
1716 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1717 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1720 -- After vect_compute_data_refs_alignment:
1721 for (i=0; i<N; i++){
1722 x = q[i]; # DR_MISALIGNMENT(q) = 3
1723 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1726 -- Possibility 1: we do loop versioning:
1728 for (i=0; i<N; i++){ # loop 1A
1729 x = q[i]; # DR_MISALIGNMENT(q) = 3
1730 p[i] = y; # DR_MISALIGNMENT(p) = 0
1734 for (i=0; i<N; i++){ # loop 1B
1735 x = q[i]; # DR_MISALIGNMENT(q) = 3
1736 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1740 -- Possibility 2: we do loop peeling:
1741 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1745 for (i = 3; i < N; i++){ # loop 2A
1746 x = q[i]; # DR_MISALIGNMENT(q) = 0
1747 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1750 -- Possibility 3: combination of loop peeling and versioning:
1751 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1756 for (i = 3; i<N; i++){ # loop 3A
1757 x = q[i]; # DR_MISALIGNMENT(q) = 0
1758 p[i] = y; # DR_MISALIGNMENT(p) = 0
1762 for (i = 3; i<N; i++){ # loop 3B
1763 x = q[i]; # DR_MISALIGNMENT(q) = 0
1764 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1768 These loops are later passed to loop_transform to be vectorized. The
1769 vectorizer will use the alignment information to guide the transformation
1770 (whether to generate regular loads/stores, or with special handling for
1774 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1776 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1777 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1778 enum dr_alignment_support supportable_dr_alignment;
1779 struct data_reference *dr0 = NULL;
1780 struct data_reference *dr;
1782 bool do_peeling = false;
1783 bool do_versioning = false;
1786 stmt_vec_info stmt_info;
1787 int vect_versioning_for_alias_required;
1789 if (vect_print_dump_info (REPORT_DETAILS))
1790 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1792 /* While cost model enhancements are expected in the future, the high level
1793 view of the code at this time is as follows:
1795 A) If there is a misaligned write then see if peeling to align this write
1796 can make all data references satisfy vect_supportable_dr_alignment.
1797 If so, update data structures as needed and return true. Note that
1798 at this time vect_supportable_dr_alignment is known to return false
1799 for a misaligned write.
1801 B) If peeling wasn't possible and there is a data reference with an
1802 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1803 then see if loop versioning checks can be used to make all data
1804 references satisfy vect_supportable_dr_alignment. If so, update
1805 data structures as needed and return true.
1807 C) If neither peeling nor versioning were successful then return false if
1808 any data reference does not satisfy vect_supportable_dr_alignment.
1810 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1812 Note, Possibility 3 above (which is peeling and versioning together) is not
1813 being done at this time. */
1815 /* (1) Peeling to force alignment. */
1817 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1819 + How many accesses will become aligned due to the peeling
1820 - How many accesses will become unaligned due to the peeling,
1821 and the cost of misaligned accesses.
1822 - The cost of peeling (the extra runtime checks, the increase
1825 The scheme we use FORNOW: peel to force the alignment of the first
1826 misaligned store in the loop.
1827 Rationale: misaligned stores are not yet supported.
1829 TODO: Use a cost model. */
1831 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1833 stmt = DR_STMT (dr);
1834 stmt_info = vinfo_for_stmt (stmt);
1836 /* For interleaving, only the alignment of the first access
1838 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1839 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1842 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1844 do_peeling = vector_alignment_reachable_p (dr);
1847 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1848 fprintf (vect_dump, "vector alignment may not be reachable");
1853 vect_versioning_for_alias_required =
1854 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1856 /* Temporarily, if versioning for alias is required, we disable peeling
1857 until we support peeling and versioning. Often peeling for alignment
1858 will require peeling for loop-bound, which in turn requires that we
1859 know how to adjust the loop ivs after the loop. */
1860 if (vect_versioning_for_alias_required
1861 || !vect_can_advance_ivs_p (loop_vinfo)
1862 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1869 tree stmt = DR_STMT (dr0);
1870 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1871 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1872 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1874 if (known_alignment_for_access_p (dr0))
1876 /* Since it's known at compile time, compute the number of iterations
1877 in the peeled loop (the peeling factor) for use in updating
1878 DR_MISALIGNMENT values. The peeling factor is the vectorization
1879 factor minus the misalignment as an element count. */
1880 mis = DR_MISALIGNMENT (dr0);
1881 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1882 npeel = nelements - mis;
1884 /* For interleaved data access every iteration accesses all the
1885 members of the group, therefore we divide the number of iterations
1886 by the group size. */
1887 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1888 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1889 npeel /= DR_GROUP_SIZE (stmt_info);
1891 if (vect_print_dump_info (REPORT_DETAILS))
1892 fprintf (vect_dump, "Try peeling by %d", npeel);
1895 /* Ensure that all data refs can be vectorized after the peel. */
1896 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1898 int save_misalignment;
1903 stmt = DR_STMT (dr);
1904 stmt_info = vinfo_for_stmt (stmt);
1905 /* For interleaving, only the alignment of the first access
1907 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1908 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1911 save_misalignment = DR_MISALIGNMENT (dr);
1912 vect_update_misalignment_for_peel (dr, dr0, npeel);
1913 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1914 SET_DR_MISALIGNMENT (dr, save_misalignment);
1916 if (!supportable_dr_alignment)
1925 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1926 If the misalignment of DR_i is identical to that of dr0 then set
1927 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1928 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1929 by the peeling factor times the element size of DR_i (MOD the
1930 vectorization factor times the size). Otherwise, the
1931 misalignment of DR_i must be set to unknown. */
1932 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1934 vect_update_misalignment_for_peel (dr, dr0, npeel);
1936 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1937 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1938 SET_DR_MISALIGNMENT (dr0, 0);
1939 if (vect_print_dump_info (REPORT_ALIGNMENT))
1940 fprintf (vect_dump, "Alignment of access forced using peeling.");
1942 if (vect_print_dump_info (REPORT_DETAILS))
1943 fprintf (vect_dump, "Peeling for alignment will be applied.");
1945 stat = vect_verify_datarefs_alignment (loop_vinfo);
1952 /* (2) Versioning to force alignment. */
1954 /* Try versioning if:
1955 1) flag_tree_vect_loop_version is TRUE
1956 2) optimize_size is FALSE
1957 3) there is at least one unsupported misaligned data ref with an unknown
1959 4) all misaligned data refs with a known misalignment are supported, and
1960 5) the number of runtime alignment checks is within reason. */
1963 flag_tree_vect_loop_version
1965 && (!loop->inner); /* FORNOW */
1969 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1971 stmt = DR_STMT (dr);
1972 stmt_info = vinfo_for_stmt (stmt);
1974 /* For interleaving, only the alignment of the first access
1976 if (aligned_access_p (dr)
1977 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1978 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1981 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1983 if (!supportable_dr_alignment)
1989 if (known_alignment_for_access_p (dr)
1990 || VEC_length (tree,
1991 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1992 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1994 do_versioning = false;
1998 stmt = DR_STMT (dr);
1999 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2000 gcc_assert (vectype);
2002 /* The rightmost bits of an aligned address must be zeros.
2003 Construct the mask needed for this test. For example,
2004 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2005 mask must be 15 = 0xf. */
2006 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2008 /* FORNOW: use the same mask to test all potentially unaligned
2009 references in the loop. The vectorizer currently supports
2010 a single vector size, see the reference to
2011 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2012 vectorization factor is computed. */
2013 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2014 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2015 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2016 VEC_safe_push (tree, heap,
2017 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2022 /* Versioning requires at least one misaligned data reference. */
2023 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2024 do_versioning = false;
2025 else if (!do_versioning)
2026 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2031 VEC(tree,heap) *may_misalign_stmts
2032 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2035 /* It can now be assumed that the data references in the statements
2036 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2037 of the loop being vectorized. */
2038 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2041 dr = STMT_VINFO_DATA_REF (stmt_info);
2042 SET_DR_MISALIGNMENT (dr, 0);
2043 if (vect_print_dump_info (REPORT_ALIGNMENT))
2044 fprintf (vect_dump, "Alignment of access forced using versioning.");
2047 if (vect_print_dump_info (REPORT_DETAILS))
2048 fprintf (vect_dump, "Versioning for alignment will be applied.");
2050 /* Peeling and versioning can't be done together at this time. */
2051 gcc_assert (! (do_peeling && do_versioning));
2053 stat = vect_verify_datarefs_alignment (loop_vinfo);
2058 /* This point is reached if neither peeling nor versioning is being done. */
2059 gcc_assert (! (do_peeling || do_versioning));
2061 stat = vect_verify_datarefs_alignment (loop_vinfo);
2066 /* Function vect_analyze_data_refs_alignment
2068 Analyze the alignment of the data-references in the loop.
2069 Return FALSE if a data reference is found that cannot be vectorized. */
2072 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2074 if (vect_print_dump_info (REPORT_DETAILS))
2075 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2077 if (!vect_compute_data_refs_alignment (loop_vinfo))
2079 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2081 "not vectorized: can't calculate alignment for data ref.");
2089 /* Analyze groups of strided accesses: check that DR belongs to a group of
2090 strided accesses of legal size, step, etc. Detect gaps, single element
2091 interleaving, and other special cases. Set strided access info.
2092 Collect groups of strided stores for further use in SLP analysis. */
2095 vect_analyze_group_access (struct data_reference *dr)
2097 tree step = DR_STEP (dr);
2098 tree scalar_type = TREE_TYPE (DR_REF (dr));
2099 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2100 tree stmt = DR_STMT (dr);
2101 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2102 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2103 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2104 HOST_WIDE_INT stride;
2105 bool slp_impossible = false;
2107 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2108 interleaving group (including gaps). */
2109 stride = dr_step / type_size;
2111 /* Not consecutive access is possible only if it is a part of interleaving. */
2112 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2114 /* Check if it this DR is a part of interleaving, and is a single
2115 element of the group that is accessed in the loop. */
2117 /* Gaps are supported only for loads. STEP must be a multiple of the type
2118 size. The size of the group must be a power of 2. */
2120 && (dr_step % type_size) == 0
2122 && exact_log2 (stride) != -1)
2124 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2125 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2126 if (vect_print_dump_info (REPORT_DR_DETAILS))
2128 fprintf (vect_dump, "Detected single element interleaving %d ",
2129 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2130 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2131 fprintf (vect_dump, " step ");
2132 print_generic_expr (vect_dump, step, TDF_SLIM);
2136 if (vect_print_dump_info (REPORT_DETAILS))
2137 fprintf (vect_dump, "not consecutive access");
2141 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2143 /* First stmt in the interleaving chain. Check the chain. */
2144 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2145 struct data_reference *data_ref = dr;
2146 unsigned int count = 1;
2148 tree prev_init = DR_INIT (data_ref);
2150 HOST_WIDE_INT diff, count_in_bytes;
2154 /* Skip same data-refs. In case that two or more stmts share data-ref
2155 (supported only for loads), we vectorize only the first stmt, and
2156 the rest get their vectorized loads from the first one. */
2157 if (!tree_int_cst_compare (DR_INIT (data_ref),
2158 DR_INIT (STMT_VINFO_DATA_REF (
2159 vinfo_for_stmt (next)))))
2161 if (!DR_IS_READ (data_ref))
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump, "Two store stmts share the same dr.");
2168 /* Check that there is no load-store dependencies for this loads
2169 to prevent a case of load-store-load to the same location. */
2170 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2171 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2173 if (vect_print_dump_info (REPORT_DETAILS))
2175 "READ_WRITE dependence in interleaving.");
2179 /* For load use the same data-ref load. */
2180 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2183 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2188 /* Check that all the accesses have the same STEP. */
2189 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2190 if (tree_int_cst_compare (step, next_step))
2192 if (vect_print_dump_info (REPORT_DETAILS))
2193 fprintf (vect_dump, "not consecutive access in interleaving");
2197 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2198 /* Check that the distance between two accesses is equal to the type
2199 size. Otherwise, we have gaps. */
2200 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2201 - TREE_INT_CST_LOW (prev_init)) / type_size;
2204 /* FORNOW: SLP of accesses with gaps is not supported. */
2205 slp_impossible = true;
2206 if (!DR_IS_READ (data_ref))
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "interleaved store with gaps");
2214 /* Store the gap from the previous member of the group. If there is no
2215 gap in the access, DR_GROUP_GAP is always 1. */
2216 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2218 prev_init = DR_INIT (data_ref);
2219 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2220 /* Count the number of data-refs in the chain. */
2224 /* COUNT is the number of accesses found, we multiply it by the size of
2225 the type to get COUNT_IN_BYTES. */
2226 count_in_bytes = type_size * count;
2228 /* Check that the size of the interleaving is not greater than STEP. */
2229 if (dr_step < count_in_bytes)
2231 if (vect_print_dump_info (REPORT_DETAILS))
2233 fprintf (vect_dump, "interleaving size is greater than step for ");
2234 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2239 /* Check that the size of the interleaving is equal to STEP for stores,
2240 i.e., that there are no gaps. */
2241 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2243 if (vect_print_dump_info (REPORT_DETAILS))
2244 fprintf (vect_dump, "interleaved store with gaps");
2248 /* Check that STEP is a multiple of type size. */
2249 if ((dr_step % type_size) != 0)
2251 if (vect_print_dump_info (REPORT_DETAILS))
2253 fprintf (vect_dump, "step is not a multiple of type size: step ");
2254 print_generic_expr (vect_dump, step, TDF_SLIM);
2255 fprintf (vect_dump, " size ");
2256 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2262 /* FORNOW: we handle only interleaving that is a power of 2.
2263 We don't fail here if it may be still possible to vectorize the
2264 group using SLP. If not, the size of the group will be checked in
2265 vect_analyze_operations, and the vectorization will fail. */
2266 if (exact_log2 (stride) == -1)
2268 if (vect_print_dump_info (REPORT_DETAILS))
2269 fprintf (vect_dump, "interleaving is not a power of 2");
2274 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2275 if (vect_print_dump_info (REPORT_DETAILS))
2276 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2278 /* SLP: create an SLP data structure for every interleaving group of
2279 stores for further analysis in vect_analyse_slp. */
2280 if (!DR_IS_READ (dr) && !slp_impossible)
2281 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2288 /* Analyze the access pattern of the data-reference DR.
2289 In case of non-consecutive accesse call vect_analyze_group_access() to
2290 analyze groups of strided accesses. */
2293 vect_analyze_data_ref_access (struct data_reference *dr)
2295 tree step = DR_STEP (dr);
2296 tree scalar_type = TREE_TYPE (DR_REF (dr));
2297 tree stmt = DR_STMT (dr);
2298 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2299 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2300 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2301 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2305 if (vect_print_dump_info (REPORT_DETAILS))
2306 fprintf (vect_dump, "bad data-ref access");
2310 /* Don't allow invariant accesses. */
2314 if (nested_in_vect_loop_p (loop, stmt))
2316 /* For the rest of the analysis we use the outer-loop step. */
2317 step = STMT_VINFO_DR_STEP (stmt_info);
2318 dr_step = TREE_INT_CST_LOW (step);
2322 if (vect_print_dump_info (REPORT_ALIGNMENT))
2323 fprintf (vect_dump, "zero step in outer loop.");
2324 if (DR_IS_READ (dr))
2332 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2334 /* Mark that it is not interleaving. */
2335 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2339 if (nested_in_vect_loop_p (loop, stmt))
2341 if (vect_print_dump_info (REPORT_ALIGNMENT))
2342 fprintf (vect_dump, "strided access in outer loop.");
2346 /* Not consecutive access - check if it's a part of interleaving group. */
2347 return vect_analyze_group_access (dr);
2351 /* Function vect_analyze_data_ref_accesses.
2353 Analyze the access pattern of all the data references in the loop.
2355 FORNOW: the only access pattern that is considered vectorizable is a
2356 simple step 1 (consecutive) access.
2358 FORNOW: handle only arrays and pointer accesses. */
2361 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2364 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2365 struct data_reference *dr;
2367 if (vect_print_dump_info (REPORT_DETAILS))
2368 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2370 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2371 if (!vect_analyze_data_ref_access (dr))
2373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2374 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2382 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2385 vect_free_slp_tree (slp_tree node)
2390 if (SLP_TREE_LEFT (node))
2391 vect_free_slp_tree (SLP_TREE_LEFT (node));
2393 if (SLP_TREE_RIGHT (node))
2394 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2396 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2398 if (SLP_TREE_VEC_STMTS (node))
2399 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2405 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2406 of a legal type and that they match the defs of the first stmt of the SLP
2407 group (stored in FIRST_STMT_...). */
2410 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2411 tree rhs, VEC (tree, heap) **def_stmts0,
2412 VEC (tree, heap) **def_stmts1,
2413 enum vect_def_type *first_stmt_dt0,
2414 enum vect_def_type *first_stmt_dt1,
2415 tree *first_stmt_def0_type,
2416 tree *first_stmt_def1_type,
2417 tree *first_stmt_const_oprnd,
2418 int ncopies_for_cost)
2421 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2422 unsigned int i, number_of_oprnds = op_type;
2424 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2425 stmt_vec_info stmt_info =
2426 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2430 number_of_oprnds = 1;
2432 gcc_assert (op_type == unary_op || op_type == binary_op);
2434 for (i = 0; i < number_of_oprnds; i++)
2437 oprnd = TREE_OPERAND (rhs, i);
2441 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2442 || (!def_stmt && dt[i] != vect_constant_def))
2444 if (vect_print_dump_info (REPORT_SLP))
2446 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2447 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2453 if (!*first_stmt_dt0)
2455 /* op0 of the first stmt of the group - store its info. */
2456 *first_stmt_dt0 = dt[i];
2458 *first_stmt_def0_type = TREE_TYPE (def);
2460 *first_stmt_const_oprnd = oprnd;
2462 /* Analyze costs (for the first stmt of the group only). */
2464 /* Not memory operation (we don't call this functions for loads). */
2465 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2468 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2473 if (!*first_stmt_dt1 && i == 1)
2475 /* op1 of the first stmt of the group - store its info. */
2476 *first_stmt_dt1 = dt[i];
2478 *first_stmt_def1_type = TREE_TYPE (def);
2481 /* We assume that the stmt contains only one constant
2482 operand. We fail otherwise, to be on the safe side. */
2483 if (*first_stmt_const_oprnd)
2485 if (vect_print_dump_info (REPORT_SLP))
2486 fprintf (vect_dump, "Build SLP failed: two constant "
2490 *first_stmt_const_oprnd = oprnd;
2495 /* Not first stmt of the group, check that the def-stmt/s match
2496 the def-stmt/s of the first stmt. */
2498 && (*first_stmt_dt0 != dt[i]
2499 || (*first_stmt_def0_type && def
2500 && *first_stmt_def0_type != TREE_TYPE (def))))
2502 && (*first_stmt_dt1 != dt[i]
2503 || (*first_stmt_def1_type && def
2504 && *first_stmt_def1_type != TREE_TYPE (def))))
2506 && TREE_TYPE (*first_stmt_const_oprnd)
2507 != TREE_TYPE (oprnd)))
2509 if (vect_print_dump_info (REPORT_SLP))
2510 fprintf (vect_dump, "Build SLP failed: different types ");
2517 /* Check the types of the definitions. */
2520 case vect_constant_def:
2521 case vect_invariant_def:
2526 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2528 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2532 /* FORNOW: Not supported. */
2533 if (vect_print_dump_info (REPORT_SLP))
2535 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2536 print_generic_expr (vect_dump, def, TDF_SLIM);
2547 /* Recursively build an SLP tree starting from NODE.
2548 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2549 permutation or are of unsupported types of operation. Otherwise, return
2551 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2552 in the case of multiple types for now. */
2555 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2556 unsigned int group_size, bool *slp_impossible,
2557 int *inside_cost, int *outside_cost,
2558 int ncopies_for_cost)
2560 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2561 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2563 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2564 tree stmt = VEC_index (tree, stmts, 0);
2565 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2566 enum tree_code first_stmt_code = 0;
2567 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2568 tree lhs, rhs, prev_stmt = NULL_TREE;
2569 bool stop_recursion = false, need_same_oprnds = false;
2570 tree vectype, scalar_type, first_op1 = NULL_TREE;
2571 unsigned int vectorization_factor = 0, ncopies;
2574 enum machine_mode optab_op2_mode;
2575 enum machine_mode vec_mode;
2576 tree first_stmt_const_oprnd = NULL_TREE;
2577 struct data_reference *first_dr;
2579 /* For every stmt in NODE find its def stmt/s. */
2580 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2582 if (vect_print_dump_info (REPORT_SLP))
2584 fprintf (vect_dump, "Build SLP for ");
2585 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2588 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2590 if (vect_print_dump_info (REPORT_SLP))
2592 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2593 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2599 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2600 vectype = get_vectype_for_scalar_type (scalar_type);
2601 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2602 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2603 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2607 if (vect_print_dump_info (REPORT_SLP))
2608 fprintf (vect_dump, "SLP failed - multiple types ");
2610 *slp_impossible = true;
2614 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2615 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2617 /* Check the operation. */
2620 first_stmt_code = TREE_CODE (rhs);
2622 /* Shift arguments should be equal in all the packed stmts for a
2623 vector shift with scalar shift operand. */
2624 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2626 vec_mode = TYPE_MODE (vectype);
2627 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2630 if (vect_print_dump_info (REPORT_SLP))
2631 fprintf (vect_dump, "Build SLP failed: no optab.");
2634 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2635 optab_op2_mode = insn_data[icode].operand[2].mode;
2636 if (!VECTOR_MODE_P (optab_op2_mode))
2638 need_same_oprnds = true;
2639 first_op1 = TREE_OPERAND (rhs, 1);
2645 if (first_stmt_code != TREE_CODE (rhs))
2647 if (vect_print_dump_info (REPORT_SLP))
2650 "Build SLP failed: different operation in stmt ");
2651 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2657 if (need_same_oprnds
2658 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2660 if (vect_print_dump_info (REPORT_SLP))
2663 "Build SLP failed: different shift arguments in ");
2664 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2671 /* Strided store or load. */
2672 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2674 if (REFERENCE_CLASS_P (lhs))
2677 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2678 &def_stmts0, &def_stmts1,
2681 &first_stmt_def0_type,
2682 &first_stmt_def1_type,
2683 &first_stmt_const_oprnd,
2692 /* First stmt of the SLP group should be the first load of
2693 the interleaving loop if data permutation is not
2695 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2697 /* FORNOW: data permutations are not supported. */
2698 if (vect_print_dump_info (REPORT_SLP))
2700 fprintf (vect_dump, "Build SLP failed: strided "
2701 " loads need permutation ");
2702 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2708 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2709 if (vect_supportable_dr_alignment (first_dr)
2710 == dr_unaligned_unsupported)
2712 if (vect_print_dump_info (REPORT_SLP))
2714 fprintf (vect_dump, "Build SLP failed: unsupported "
2715 " unaligned load ");
2716 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2722 /* Analyze costs (for the first stmt in the group). */
2723 vect_model_load_cost (vinfo_for_stmt (stmt),
2724 ncopies_for_cost, *node);
2728 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2730 /* FORNOW: data permutations are not supported. */
2731 if (vect_print_dump_info (REPORT_SLP))
2733 fprintf (vect_dump, "Build SLP failed: strided "
2734 " loads need permutation ");
2735 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2743 /* We stop the tree when we reach a group of loads. */
2744 stop_recursion = true;
2747 } /* Strided access. */
2750 if (REFERENCE_CLASS_P (rhs))
2752 /* Not strided load. */
2753 if (vect_print_dump_info (REPORT_SLP))
2755 fprintf (vect_dump, "Build SLP failed: not strided load ");
2756 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2759 /* FORNOW: Not strided loads are not supported. */
2763 /* Not memory operation. */
2764 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2766 if (vect_print_dump_info (REPORT_SLP))
2768 fprintf (vect_dump, "Build SLP failed: operation");
2769 fprintf (vect_dump, " unsupported ");
2770 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2776 /* Find the def-stmts. */
2777 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2778 &def_stmts1, &first_stmt_dt0,
2780 &first_stmt_def0_type,
2781 &first_stmt_def1_type,
2782 &first_stmt_const_oprnd,
2788 /* Add the costs of the node to the overall instance costs. */
2789 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2790 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2792 /* Strided loads were reached - stop the recursion. */
2796 /* Create SLP_TREE nodes for the definition node/s. */
2797 if (first_stmt_dt0 == vect_loop_def)
2799 slp_tree left_node = XNEW (struct _slp_tree);
2800 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2801 SLP_TREE_VEC_STMTS (left_node) = NULL;
2802 SLP_TREE_LEFT (left_node) = NULL;
2803 SLP_TREE_RIGHT (left_node) = NULL;
2804 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2805 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2806 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2807 slp_impossible, inside_cost, outside_cost,
2811 SLP_TREE_LEFT (*node) = left_node;
2814 if (first_stmt_dt1 == vect_loop_def)
2816 slp_tree right_node = XNEW (struct _slp_tree);
2817 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2818 SLP_TREE_VEC_STMTS (right_node) = NULL;
2819 SLP_TREE_LEFT (right_node) = NULL;
2820 SLP_TREE_RIGHT (right_node) = NULL;
2821 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2822 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2823 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2824 slp_impossible, inside_cost, outside_cost,
2828 SLP_TREE_RIGHT (*node) = right_node;
2836 vect_print_slp_tree (slp_tree node)
2844 fprintf (vect_dump, "node ");
2845 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2847 fprintf (vect_dump, "\n\tstmt %d ", i);
2848 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2850 fprintf (vect_dump, "\n");
2852 vect_print_slp_tree (SLP_TREE_LEFT (node));
2853 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2857 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2858 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2859 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2860 stmts in NODE are to be marked. */
2863 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2871 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2872 if (j < 0 || i == j)
2873 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2875 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2876 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2880 /* Analyze an SLP instance starting from a group of strided stores. Call
2881 vect_build_slp_tree to build a tree of packed stmts if possible.
2882 Return FALSE if it's impossible to SLP any stmt in the loop. */
2885 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2887 slp_instance new_instance;
2888 slp_tree node = XNEW (struct _slp_tree);
2889 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2890 unsigned int unrolling_factor = 1, nunits;
2891 tree vectype, scalar_type, next;
2892 unsigned int vectorization_factor = 0, ncopies;
2893 bool slp_impossible = false;
2894 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2896 /* FORNOW: multiple types are not supported. */
2897 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2898 vectype = get_vectype_for_scalar_type (scalar_type);
2899 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2900 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2901 ncopies = vectorization_factor / nunits;
2904 if (vect_print_dump_info (REPORT_SLP))
2905 fprintf (vect_dump, "SLP failed - multiple types ");
2910 /* Create a node (a root of the SLP tree) for the packed strided stores. */
2911 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
2913 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2916 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
2917 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2920 SLP_TREE_VEC_STMTS (node) = NULL;
2921 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
2922 SLP_TREE_LEFT (node) = NULL;
2923 SLP_TREE_RIGHT (node) = NULL;
2924 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
2925 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
2927 /* Calculate the unrolling factor. */
2928 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
2930 /* Calculate the number of vector stmts to create based on the unrolling
2931 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
2932 GROUP_SIZE / NUNITS otherwise. */
2933 ncopies_for_cost = unrolling_factor * group_size / nunits;
2935 /* Build the tree for the SLP instance. */
2936 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
2937 &inside_cost, &outside_cost, ncopies_for_cost))
2939 /* Create a new SLP instance. */
2940 new_instance = XNEW (struct _slp_instance);
2941 SLP_INSTANCE_TREE (new_instance) = node;
2942 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2943 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2944 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
2945 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
2946 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
2948 if (vect_print_dump_info (REPORT_SLP))
2949 vect_print_slp_tree (node);
2954 /* Failed to SLP. */
2955 /* Free the allocated memory. */
2956 vect_free_slp_tree (node);
2961 /* SLP failed for this instance, but it is still possible to SLP other stmts
2967 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2968 trees of packed scalar stmts if SLP is possible. */
2971 vect_analyze_slp (loop_vec_info loop_vinfo)
2974 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
2977 if (vect_print_dump_info (REPORT_SLP))
2978 fprintf (vect_dump, "=== vect_analyze_slp ===");
2980 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
2981 if (!vect_analyze_slp_instance (loop_vinfo, store))
2983 /* SLP failed. No instance can be SLPed in the loop. */
2984 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2985 fprintf (vect_dump, "SLP failed.");
2994 /* For each possible SLP instance decide whether to SLP it and calculate overall
2995 unrolling factor needed to SLP the loop. */
2998 vect_make_slp_decision (loop_vec_info loop_vinfo)
3000 unsigned int i, unrolling_factor = 1;
3001 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3002 slp_instance instance;
3003 int decided_to_slp = 0;
3005 if (vect_print_dump_info (REPORT_SLP))
3006 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3008 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3010 /* FORNOW: SLP if you can. */
3011 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3012 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3014 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3015 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3016 loop-based vectorization. Such stmts will be marked as HYBRID. */
3017 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3021 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3023 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3024 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3025 decided_to_slp, unrolling_factor);
3029 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3030 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3033 vect_detect_hybrid_slp_stmts (slp_tree node)
3037 imm_use_iterator imm_iter;
3043 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3044 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3045 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3046 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3047 if (vinfo_for_stmt (use_stmt)
3048 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3049 vect_mark_slp_stmts (node, hybrid, i);
3051 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3052 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3056 /* Find stmts that must be both vectorized and SLPed. */
3059 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3062 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3063 slp_instance instance;
3065 if (vect_print_dump_info (REPORT_SLP))
3066 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3068 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3069 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3073 /* Function vect_analyze_data_refs.
3075 Find all the data references in the loop.
3077 The general structure of the analysis of data refs in the vectorizer is as
3079 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3080 find and analyze all data-refs in the loop and their dependences.
3081 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3082 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3083 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3088 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3090 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3092 VEC (data_reference_p, heap) *datarefs;
3093 struct data_reference *dr;
3096 if (vect_print_dump_info (REPORT_DETAILS))
3097 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3099 compute_data_dependences_for_loop (loop, true,
3100 &LOOP_VINFO_DATAREFS (loop_vinfo),
3101 &LOOP_VINFO_DDRS (loop_vinfo));
3103 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3104 from stmt_vec_info struct to DR and vectype. */
3105 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3107 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3110 stmt_vec_info stmt_info;
3112 tree base, offset, init;
3114 if (!dr || !DR_REF (dr))
3116 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3117 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3121 stmt = DR_STMT (dr);
3122 stmt_info = vinfo_for_stmt (stmt);
3124 /* Check that analysis of the data-ref succeeded. */
3125 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3128 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3130 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3131 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3136 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3138 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3139 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3144 if (!DR_SYMBOL_TAG (dr))
3146 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3148 fprintf (vect_dump, "not vectorized: no memory tag for ");
3149 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3154 base = unshare_expr (DR_BASE_ADDRESS (dr));
3155 offset = unshare_expr (DR_OFFSET (dr));
3156 init = unshare_expr (DR_INIT (dr));
3158 /* Update DR field in stmt_vec_info struct. */
3159 bb = bb_for_stmt (stmt);
3161 /* If the dataref is in an inner-loop of the loop that is considered for
3162 for vectorization, we also want to analyze the access relative to
3163 the outer-loop (DR contains information only relative to the
3164 inner-most enclosing loop). We do that by building a reference to the
3165 first location accessed by the inner-loop, and analyze it relative to
3167 if (nested_in_vect_loop_p (loop, stmt))
3169 tree outer_step, outer_base, outer_init;
3170 HOST_WIDE_INT pbitsize, pbitpos;
3172 enum machine_mode pmode;
3173 int punsignedp, pvolatilep;
3174 affine_iv base_iv, offset_iv;
3177 /* Build a reference to the first location accessed by the
3178 inner-loop: *(BASE+INIT). (The first location is actually
3179 BASE+INIT+OFFSET, but we add OFFSET separately later. */
3180 tree inner_base = build_fold_indirect_ref
3181 (fold_build2 (PLUS_EXPR, TREE_TYPE (base), base, init));
3183 if (vect_print_dump_info (REPORT_DETAILS))
3185 fprintf (dump_file, "analyze in outer-loop: ");
3186 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3189 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3190 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3191 gcc_assert (outer_base != NULL_TREE);
3193 if (pbitpos % BITS_PER_UNIT != 0)
3195 if (vect_print_dump_info (REPORT_DETAILS))
3196 fprintf (dump_file, "failed: bit offset alignment.\n");
3200 outer_base = build_fold_addr_expr (outer_base);
3201 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3203 if (vect_print_dump_info (REPORT_DETAILS))
3204 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3211 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3218 offset_iv.base = ssize_int (0);
3219 offset_iv.step = ssize_int (0);
3221 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3223 if (vect_print_dump_info (REPORT_DETAILS))
3224 fprintf (dump_file, "evolution of offset is not affine.\n");
3228 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3229 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3230 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3231 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3232 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3234 outer_step = size_binop (PLUS_EXPR,
3235 fold_convert (ssizetype, base_iv.step),
3236 fold_convert (ssizetype, offset_iv.step));
3238 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3239 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3240 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3241 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3242 STMT_VINFO_DR_OFFSET (stmt_info) =
3243 fold_convert (ssizetype, offset_iv.base);
3244 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3245 size_int (highest_pow2_factor (offset_iv.base));
3247 if (dump_file && (dump_flags & TDF_DETAILS))
3249 fprintf (dump_file, "\touter base_address: ");
3250 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3251 fprintf (dump_file, "\n\touter offset from base address: ");
3252 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3253 fprintf (dump_file, "\n\touter constant offset from base address: ");
3254 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3255 fprintf (dump_file, "\n\touter step: ");
3256 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3257 fprintf (dump_file, "\n\touter aligned to: ");
3258 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3262 if (STMT_VINFO_DATA_REF (stmt_info))
3264 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3267 "not vectorized: more than one data ref in stmt: ");
3268 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3272 STMT_VINFO_DATA_REF (stmt_info) = dr;
3274 /* Set vectype for STMT. */
3275 scalar_type = TREE_TYPE (DR_REF (dr));
3276 STMT_VINFO_VECTYPE (stmt_info) =
3277 get_vectype_for_scalar_type (scalar_type);
3278 if (!STMT_VINFO_VECTYPE (stmt_info))
3280 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3283 "not vectorized: no vectype for stmt: ");
3284 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3285 fprintf (vect_dump, " scalar_type: ");
3286 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3296 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3298 /* Function vect_mark_relevant.
3300 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3303 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3304 enum vect_relevant relevant, bool live_p)
3306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3307 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3308 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3310 if (vect_print_dump_info (REPORT_DETAILS))
3311 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3313 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3317 /* This is the last stmt in a sequence that was detected as a
3318 pattern that can potentially be vectorized. Don't mark the stmt
3319 as relevant/live because it's not going to be vectorized.
3320 Instead mark the pattern-stmt that replaces it. */
3322 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3324 if (vect_print_dump_info (REPORT_DETAILS))
3325 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3326 stmt_info = vinfo_for_stmt (pattern_stmt);
3327 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3328 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3329 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3330 stmt = pattern_stmt;
3333 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3334 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3335 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3337 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3338 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3340 if (vect_print_dump_info (REPORT_DETAILS))
3341 fprintf (vect_dump, "already marked relevant/live.");
3345 VEC_safe_push (tree, heap, *worklist, stmt);
3349 /* Function vect_stmt_relevant_p.
3351 Return true if STMT in loop that is represented by LOOP_VINFO is
3352 "relevant for vectorization".
3354 A stmt is considered "relevant for vectorization" if:
3355 - it has uses outside the loop.
3356 - it has vdefs (it alters memory).
3357 - control stmts in the loop (except for the exit condition).
3359 CHECKME: what other side effects would the vectorizer allow? */
3362 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3363 enum vect_relevant *relevant, bool *live_p)
3365 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3366 ssa_op_iter op_iter;
3367 imm_use_iterator imm_iter;
3368 use_operand_p use_p;
3369 def_operand_p def_p;
3371 *relevant = vect_unused_in_loop;
3374 /* cond stmt other than loop exit cond. */
3375 if (is_ctrl_stmt (stmt)
3376 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3377 *relevant = vect_used_in_loop;
3379 /* changing memory. */
3380 if (TREE_CODE (stmt) != PHI_NODE)
3381 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3383 if (vect_print_dump_info (REPORT_DETAILS))
3384 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3385 *relevant = vect_used_in_loop;
3388 /* uses outside the loop. */
3389 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3391 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3393 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3394 if (!flow_bb_inside_loop_p (loop, bb))
3396 if (vect_print_dump_info (REPORT_DETAILS))
3397 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3399 /* We expect all such uses to be in the loop exit phis
3400 (because of loop closed form) */
3401 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3402 gcc_assert (bb == single_exit (loop)->dest);
3409 return (*live_p || *relevant);
3414 Function process_use.
3417 - a USE in STMT in a loop represented by LOOP_VINFO
3418 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3419 that defined USE. This is dont by calling mark_relevant and passing it
3420 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3423 Generally, LIVE_P and RELEVANT are used to define the liveness and
3424 relevance info of the DEF_STMT of this USE:
3425 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3426 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3428 - case 1: If USE is used only for address computations (e.g. array indexing),
3429 which does not need to be directly vectorized, then the liveness/relevance
3430 of the respective DEF_STMT is left unchanged.
3431 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3432 skip DEF_STMT cause it had already been processed.
3433 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3434 be modified accordingly.
3436 Return true if everything is as expected. Return false otherwise. */
3439 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3440 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3442 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3443 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3444 stmt_vec_info dstmt_vinfo;
3445 basic_block bb, def_bb;
3447 enum vect_def_type dt;
3449 /* case 1: we are only interested in uses that need to be vectorized. Uses
3450 that are used for address computation are not considered relevant. */
3451 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3454 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3456 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3457 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3461 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3464 def_bb = bb_for_stmt (def_stmt);
3465 if (!flow_bb_inside_loop_p (loop, def_bb))
3467 if (vect_print_dump_info (REPORT_DETAILS))
3468 fprintf (vect_dump, "def_stmt is out of loop.");
3472 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3473 DEF_STMT must have already been processed, because this should be the
3474 only way that STMT, which is a reduction-phi, was put in the worklist,
3475 as there should be no other uses for DEF_STMT in the loop. So we just
3476 check that everything is as expected, and we are done. */
3477 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3478 bb = bb_for_stmt (stmt);
3479 if (TREE_CODE (stmt) == PHI_NODE
3480 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3481 && TREE_CODE (def_stmt) != PHI_NODE
3482 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3483 && bb->loop_father == def_bb->loop_father)
3485 if (vect_print_dump_info (REPORT_DETAILS))
3486 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3487 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3488 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3489 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3490 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3491 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3495 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3496 outer-loop-header-bb:
3502 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3504 if (vect_print_dump_info (REPORT_DETAILS))
3505 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3508 case vect_unused_in_loop:
3509 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3510 vect_used_by_reduction : vect_unused_in_loop;
3512 case vect_used_in_outer_by_reduction:
3513 relevant = vect_used_by_reduction;
3515 case vect_used_in_outer:
3516 relevant = vect_used_in_loop;
3518 case vect_used_by_reduction:
3519 case vect_used_in_loop:
3527 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3528 outer-loop-header-bb:
3534 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3536 if (vect_print_dump_info (REPORT_DETAILS))
3537 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3540 case vect_unused_in_loop:
3541 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3542 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3545 case vect_used_in_outer_by_reduction:
3546 case vect_used_in_outer:
3549 case vect_used_by_reduction:
3550 relevant = vect_used_in_outer_by_reduction;
3553 case vect_used_in_loop:
3554 relevant = vect_used_in_outer;
3562 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3567 /* Function vect_mark_stmts_to_be_vectorized.
3569 Not all stmts in the loop need to be vectorized. For example:
3578 Stmt 1 and 3 do not need to be vectorized, because loop control and
3579 addressing of vectorized data-refs are handled differently.
3581 This pass detects such stmts. */
3584 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3586 VEC(tree,heap) *worklist;
3587 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3588 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3589 unsigned int nbbs = loop->num_nodes;
3590 block_stmt_iterator si;
3594 stmt_vec_info stmt_vinfo;
3598 enum vect_relevant relevant;
3600 if (vect_print_dump_info (REPORT_DETAILS))
3601 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3603 worklist = VEC_alloc (tree, heap, 64);
3605 /* 1. Init worklist. */
3606 for (i = 0; i < nbbs; i++)
3609 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3611 if (vect_print_dump_info (REPORT_DETAILS))
3613 fprintf (vect_dump, "init: phi relevant? ");
3614 print_generic_expr (vect_dump, phi, TDF_SLIM);
3617 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3618 vect_mark_relevant (&worklist, phi, relevant, live_p);
3620 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3622 stmt = bsi_stmt (si);
3623 if (vect_print_dump_info (REPORT_DETAILS))
3625 fprintf (vect_dump, "init: stmt relevant? ");
3626 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3629 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3630 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3634 /* 2. Process_worklist */
3635 while (VEC_length (tree, worklist) > 0)
3637 use_operand_p use_p;
3640 stmt = VEC_pop (tree, worklist);
3641 if (vect_print_dump_info (REPORT_DETAILS))
3643 fprintf (vect_dump, "worklist: examine stmt: ");
3644 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3647 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3648 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3649 liveness and relevance properties of STMT. */
3650 ann = stmt_ann (stmt);
3651 stmt_vinfo = vinfo_for_stmt (stmt);
3652 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3653 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3655 /* Generally, the liveness and relevance properties of STMT are
3656 propagated as is to the DEF_STMTs of its USEs:
3657 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3658 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3660 One exception is when STMT has been identified as defining a reduction
3661 variable; in this case we set the liveness/relevance as follows:
3663 relevant = vect_used_by_reduction
3664 This is because we distinguish between two kinds of relevant stmts -
3665 those that are used by a reduction computation, and those that are
3666 (also) used by a regular computation. This allows us later on to
3667 identify stmts that are used solely by a reduction, and therefore the
3668 order of the results that they produce does not have to be kept.
3670 Reduction phis are expected to be used by a reduction stmt, or by
3671 in an outer loop; Other reduction stmts are expected to be
3672 in the loop, and possibly used by a stmt in an outer loop.
3673 Here are the expected values of "relevant" for reduction phis/stmts:
3676 vect_unused_in_loop ok
3677 vect_used_in_outer_by_reduction ok ok
3678 vect_used_in_outer ok ok
3679 vect_used_by_reduction ok
3680 vect_used_in_loop */
3682 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3684 enum vect_relevant tmp_relevant = relevant;
3685 switch (tmp_relevant)
3687 case vect_unused_in_loop:
3688 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3689 relevant = vect_used_by_reduction;
3692 case vect_used_in_outer_by_reduction:
3693 case vect_used_in_outer:
3694 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3695 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3698 case vect_used_by_reduction:
3699 if (TREE_CODE (stmt) == PHI_NODE)
3702 case vect_used_in_loop:
3704 if (vect_print_dump_info (REPORT_DETAILS))
3705 fprintf (vect_dump, "unsupported use of reduction.");
3706 VEC_free (tree, heap, worklist);
3712 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3714 tree op = USE_FROM_PTR (use_p);
3715 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3717 VEC_free (tree, heap, worklist);
3721 } /* while worklist */
3723 VEC_free (tree, heap, worklist);
3728 /* Function vect_can_advance_ivs_p
3730 In case the number of iterations that LOOP iterates is unknown at compile
3731 time, an epilog loop will be generated, and the loop induction variables
3732 (IVs) will be "advanced" to the value they are supposed to take just before
3733 the epilog loop. Here we check that the access function of the loop IVs
3734 and the expression that represents the loop bound are simple enough.
3735 These restrictions will be relaxed in the future. */
3738 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3740 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3741 basic_block bb = loop->header;
3744 /* Analyze phi functions of the loop header. */
3746 if (vect_print_dump_info (REPORT_DETAILS))
3747 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3749 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3751 tree access_fn = NULL;
3752 tree evolution_part;
3754 if (vect_print_dump_info (REPORT_DETAILS))
3756 fprintf (vect_dump, "Analyze phi: ");
3757 print_generic_expr (vect_dump, phi, TDF_SLIM);
3760 /* Skip virtual phi's. The data dependences that are associated with
3761 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3763 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3765 if (vect_print_dump_info (REPORT_DETAILS))
3766 fprintf (vect_dump, "virtual phi. skip.");
3770 /* Skip reduction phis. */
3772 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3774 if (vect_print_dump_info (REPORT_DETAILS))
3775 fprintf (vect_dump, "reduc phi. skip.");
3779 /* Analyze the evolution function. */
3781 access_fn = instantiate_parameters
3782 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3786 if (vect_print_dump_info (REPORT_DETAILS))
3787 fprintf (vect_dump, "No Access function.");
3791 if (vect_print_dump_info (REPORT_DETAILS))
3793 fprintf (vect_dump, "Access function of PHI: ");
3794 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3797 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3799 if (evolution_part == NULL_TREE)
3801 if (vect_print_dump_info (REPORT_DETAILS))
3802 fprintf (vect_dump, "No evolution.");
3806 /* FORNOW: We do not transform initial conditions of IVs
3807 which evolution functions are a polynomial of degree >= 2. */
3809 if (tree_is_chrec (evolution_part))
3817 /* Function vect_get_loop_niters.
3819 Determine how many iterations the loop is executed.
3820 If an expression that represents the number of iterations
3821 can be constructed, place it in NUMBER_OF_ITERATIONS.
3822 Return the loop exit condition. */
3825 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3829 if (vect_print_dump_info (REPORT_DETAILS))
3830 fprintf (vect_dump, "=== get_loop_niters ===");
3832 niters = number_of_exit_cond_executions (loop);
3834 if (niters != NULL_TREE
3835 && niters != chrec_dont_know)
3837 *number_of_iterations = niters;
3839 if (vect_print_dump_info (REPORT_DETAILS))
3841 fprintf (vect_dump, "==> get_loop_niters:" );
3842 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3846 return get_loop_exit_condition (loop);
3850 /* Function vect_analyze_loop_1.
3852 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3853 for it. The different analyses will record information in the
3854 loop_vec_info struct. This is a subset of the analyses applied in
3855 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3856 that is now considered for (outer-loop) vectorization. */
3858 static loop_vec_info
3859 vect_analyze_loop_1 (struct loop *loop)
3861 loop_vec_info loop_vinfo;
3863 if (vect_print_dump_info (REPORT_DETAILS))
3864 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3866 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3868 loop_vinfo = vect_analyze_loop_form (loop);
3871 if (vect_print_dump_info (REPORT_DETAILS))
3872 fprintf (vect_dump, "bad inner-loop form.");
3880 /* Function vect_analyze_loop_form.
3882 Verify that certain CFG restrictions hold, including:
3883 - the loop has a pre-header
3884 - the loop has a single entry and exit
3885 - the loop exit condition is simple enough, and the number of iterations
3886 can be analyzed (a countable loop). */
3888 static loop_vec_info
3889 vect_analyze_loop_form (struct loop *loop)
3891 loop_vec_info loop_vinfo;
3893 tree number_of_iterations = NULL;
3894 loop_vec_info inner_loop_vinfo = NULL;
3896 if (vect_print_dump_info (REPORT_DETAILS))
3897 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3899 /* Different restrictions apply when we are considering an inner-most loop,
3900 vs. an outer (nested) loop.
3901 (FORNOW. May want to relax some of these restrictions in the future). */
3905 /* Inner-most loop. We currently require that the number of BBs is
3906 exactly 2 (the header and latch). Vectorizable inner-most loops
3917 if (loop->num_nodes != 2)
3919 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3920 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3924 if (empty_block_p (loop->header))
3926 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3927 fprintf (vect_dump, "not vectorized: empty loop.");
3933 struct loop *innerloop = loop->inner;
3934 edge backedge, entryedge;
3936 /* Nested loop. We currently require that the loop is doubly-nested,
3937 contains a single inner loop, and the number of BBs is exactly 5.
3938 Vectorizable outer-loops look like this:
3950 The inner-loop has the properties expected of inner-most loops
3951 as described above. */
3953 if ((loop->inner)->inner || (loop->inner)->next)
3955 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3956 fprintf (vect_dump, "not vectorized: multiple nested loops.");
3960 /* Analyze the inner-loop. */
3961 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
3962 if (!inner_loop_vinfo)
3964 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3965 fprintf (vect_dump, "not vectorized: Bad inner loop.");
3969 if (!expr_invariant_in_loop_p (loop,
3970 LOOP_VINFO_NITERS (inner_loop_vinfo)))
3972 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3974 "not vectorized: inner-loop count not invariant.");
3975 destroy_loop_vec_info (inner_loop_vinfo, true);
3979 if (loop->num_nodes != 5)
3981 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3982 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3983 destroy_loop_vec_info (inner_loop_vinfo, true);
3987 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
3988 backedge = EDGE_PRED (innerloop->header, 1);
3989 entryedge = EDGE_PRED (innerloop->header, 0);
3990 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
3992 backedge = EDGE_PRED (innerloop->header, 0);
3993 entryedge = EDGE_PRED (innerloop->header, 1);
3996 if (entryedge->src != loop->header
3997 || !single_exit (innerloop)
3998 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4000 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4001 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4002 destroy_loop_vec_info (inner_loop_vinfo, true);
4006 if (vect_print_dump_info (REPORT_DETAILS))
4007 fprintf (vect_dump, "Considering outer-loop vectorization.");
4010 if (!single_exit (loop)
4011 || EDGE_COUNT (loop->header->preds) != 2)
4013 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4015 if (!single_exit (loop))
4016 fprintf (vect_dump, "not vectorized: multiple exits.");
4017 else if (EDGE_COUNT (loop->header->preds) != 2)
4018 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4020 if (inner_loop_vinfo)
4021 destroy_loop_vec_info (inner_loop_vinfo, true);
4025 /* We assume that the loop exit condition is at the end of the loop. i.e,
4026 that the loop is represented as a do-while (with a proper if-guard
4027 before the loop if needed), where the loop header contains all the
4028 executable statements, and the latch is empty. */
4029 if (!empty_block_p (loop->latch)
4030 || phi_nodes (loop->latch))
4032 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4033 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4034 if (inner_loop_vinfo)
4035 destroy_loop_vec_info (inner_loop_vinfo, true);
4039 /* Make sure there exists a single-predecessor exit bb: */
4040 if (!single_pred_p (single_exit (loop)->dest))
4042 edge e = single_exit (loop);
4043 if (!(e->flags & EDGE_ABNORMAL))
4045 split_loop_exit_edge (e);
4046 if (vect_print_dump_info (REPORT_DETAILS))
4047 fprintf (vect_dump, "split exit edge.");
4051 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4052 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4053 if (inner_loop_vinfo)
4054 destroy_loop_vec_info (inner_loop_vinfo, true);
4059 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4062 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4063 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4064 if (inner_loop_vinfo)
4065 destroy_loop_vec_info (inner_loop_vinfo, true);
4069 if (!number_of_iterations)
4071 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4073 "not vectorized: number of iterations cannot be computed.");
4074 if (inner_loop_vinfo)
4075 destroy_loop_vec_info (inner_loop_vinfo, true);
4079 if (chrec_contains_undetermined (number_of_iterations))
4081 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4082 fprintf (vect_dump, "Infinite number of iterations.");
4083 if (inner_loop_vinfo)
4084 destroy_loop_vec_info (inner_loop_vinfo, true);
4088 if (!NITERS_KNOWN_P (number_of_iterations))
4090 if (vect_print_dump_info (REPORT_DETAILS))
4092 fprintf (vect_dump, "Symbolic number of iterations is ");
4093 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4096 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4098 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4099 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4100 if (inner_loop_vinfo)
4101 destroy_loop_vec_info (inner_loop_vinfo, false);
4105 loop_vinfo = new_loop_vec_info (loop);
4106 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4108 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4110 /* CHECKME: May want to keep it around it in the future. */
4111 if (inner_loop_vinfo)
4112 destroy_loop_vec_info (inner_loop_vinfo, false);
4114 gcc_assert (!loop->aux);
4115 loop->aux = loop_vinfo;
4120 /* Function vect_analyze_loop.
4122 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4123 for it. The different analyses will record information in the
4124 loop_vec_info struct. */
4126 vect_analyze_loop (struct loop *loop)
4129 loop_vec_info loop_vinfo;
4131 if (vect_print_dump_info (REPORT_DETAILS))
4132 fprintf (vect_dump, "===== analyze_loop_nest =====");
4134 if (loop_outer (loop)
4135 && loop_vec_info_for_loop (loop_outer (loop))
4136 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4138 if (vect_print_dump_info (REPORT_DETAILS))
4139 fprintf (vect_dump, "outer-loop already vectorized.");
4143 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4145 loop_vinfo = vect_analyze_loop_form (loop);
4148 if (vect_print_dump_info (REPORT_DETAILS))
4149 fprintf (vect_dump, "bad loop form.");
4153 /* Find all data references in the loop (which correspond to vdefs/vuses)
4154 and analyze their evolution in the loop.
4156 FORNOW: Handle only simple, array references, which
4157 alignment can be forced, and aligned pointer-references. */
4159 ok = vect_analyze_data_refs (loop_vinfo);
4162 if (vect_print_dump_info (REPORT_DETAILS))
4163 fprintf (vect_dump, "bad data references.");
4164 destroy_loop_vec_info (loop_vinfo, true);
4168 /* Classify all cross-iteration scalar data-flow cycles.
4169 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4171 vect_analyze_scalar_cycles (loop_vinfo);
4173 vect_pattern_recog (loop_vinfo);
4175 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4177 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4180 if (vect_print_dump_info (REPORT_DETAILS))
4181 fprintf (vect_dump, "unexpected pattern.");
4182 destroy_loop_vec_info (loop_vinfo, true);
4186 /* Analyze the alignment of the data-refs in the loop.
4187 Fail if a data reference is found that cannot be vectorized. */
4189 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4192 if (vect_print_dump_info (REPORT_DETAILS))
4193 fprintf (vect_dump, "bad data alignment.");
4194 destroy_loop_vec_info (loop_vinfo, true);
4198 ok = vect_determine_vectorization_factor (loop_vinfo);
4201 if (vect_print_dump_info (REPORT_DETAILS))
4202 fprintf (vect_dump, "can't determine vectorization factor.");
4203 destroy_loop_vec_info (loop_vinfo, true);
4207 /* Analyze data dependences between the data-refs in the loop.
4208 FORNOW: fail at the first data dependence that we encounter. */
4210 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4213 if (vect_print_dump_info (REPORT_DETAILS))
4214 fprintf (vect_dump, "bad data dependence.");
4215 destroy_loop_vec_info (loop_vinfo, true);
4219 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4220 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4222 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4225 if (vect_print_dump_info (REPORT_DETAILS))
4226 fprintf (vect_dump, "bad data access.");
4227 destroy_loop_vec_info (loop_vinfo, true);
4231 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4232 ok = vect_analyze_slp (loop_vinfo);
4235 /* Decide which possible SLP instances to SLP. */
4236 vect_make_slp_decision (loop_vinfo);
4238 /* Find stmts that need to be both vectorized and SLPed. */
4239 vect_detect_hybrid_slp (loop_vinfo);
4242 /* This pass will decide on using loop versioning and/or loop peeling in
4243 order to enhance the alignment of data references in the loop. */
4245 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4248 if (vect_print_dump_info (REPORT_DETAILS))
4249 fprintf (vect_dump, "bad data alignment.");
4250 destroy_loop_vec_info (loop_vinfo, true);
4254 /* Scan all the operations in the loop and make sure they are
4257 ok = vect_analyze_operations (loop_vinfo);
4260 if (vect_print_dump_info (REPORT_DETAILS))
4261 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4262 destroy_loop_vec_info (loop_vinfo, true);
4266 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;