1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
104 stmt_vec_info stmt_info;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 gcc_assert (stmt_info);
178 /* skip stmts which do not need to be vectorized. */
179 if (!STMT_VINFO_RELEVANT_P (stmt_info)
180 && !STMT_VINFO_LIVE_P (stmt_info))
182 if (vect_print_dump_info (REPORT_DETAILS))
183 fprintf (vect_dump, "skip.");
187 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
189 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
191 fprintf (vect_dump, "not vectorized: irregular stmt.");
192 print_generic_expr (vect_dump, stmt, TDF_SLIM);
197 if (!GIMPLE_STMT_P (stmt)
198 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
202 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
203 print_generic_expr (vect_dump, stmt, TDF_SLIM);
208 if (STMT_VINFO_VECTYPE (stmt_info))
210 /* The only case when a vectype had been already set is for stmts
211 that contain a dataref, or for "pattern-stmts" (stmts generated
212 by the vectorizer to represent/replace a certain idiom). */
213 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
214 || is_pattern_stmt_p (stmt_info));
215 vectype = STMT_VINFO_VECTYPE (stmt_info);
219 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
220 && !is_pattern_stmt_p (stmt_info));
222 /* We set the vectype according to the type of the result (lhs).
223 For stmts whose result-type is different than the type of the
224 arguments (e.g. demotion, promotion), vectype will be reset
225 appropriately (later). Note that we have to visit the smallest
226 datatype in this function, because that determines the VF.
227 If the smallest datatype in the loop is present only as the
228 rhs of a promotion operation - we'd miss it here.
229 However, in such a case, that a variable of this datatype
230 does not appear in the lhs anywhere in the loop, it shouldn't
231 affect the vectorization factor. */
232 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
234 if (vect_print_dump_info (REPORT_DETAILS))
236 fprintf (vect_dump, "get vectype for scalar type: ");
237 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
240 vectype = get_vectype_for_scalar_type (scalar_type);
243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
246 "not vectorized: unsupported data-type ");
247 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
251 STMT_VINFO_VECTYPE (stmt_info) = vectype;
254 if (vect_print_dump_info (REPORT_DETAILS))
256 fprintf (vect_dump, "vectype: ");
257 print_generic_expr (vect_dump, vectype, TDF_SLIM);
260 nunits = TYPE_VECTOR_SUBPARTS (vectype);
261 if (vect_print_dump_info (REPORT_DETAILS))
262 fprintf (vect_dump, "nunits = %d", nunits);
264 if (!vectorization_factor
265 || (nunits > vectorization_factor))
266 vectorization_factor = nunits;
271 /* TODO: Analyze cost. Decide if worth while to vectorize. */
272 if (vect_print_dump_info (REPORT_DETAILS))
273 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
274 if (vectorization_factor <= 1)
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
277 fprintf (vect_dump, "not vectorized: unsupported data-type");
280 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
286 /* Function vect_analyze_operations.
288 Scan the loop stmts and make sure they are all vectorizable. */
291 vect_analyze_operations (loop_vec_info loop_vinfo)
293 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
294 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
295 int nbbs = loop->num_nodes;
296 block_stmt_iterator si;
297 unsigned int vectorization_factor = 0;
301 stmt_vec_info stmt_info;
302 bool need_to_vectorize = false;
303 int min_profitable_iters;
304 int min_scalar_loop_bound;
307 if (vect_print_dump_info (REPORT_DETAILS))
308 fprintf (vect_dump, "=== vect_analyze_operations ===");
310 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
311 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
313 for (i = 0; i < nbbs; i++)
315 basic_block bb = bbs[i];
317 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
321 stmt_info = vinfo_for_stmt (phi);
322 if (vect_print_dump_info (REPORT_DETAILS))
324 fprintf (vect_dump, "examining phi: ");
325 print_generic_expr (vect_dump, phi, TDF_SLIM);
328 if (! is_loop_header_bb_p (bb))
330 /* inner-loop loop-closed exit phi in outer-loop vectorization
331 (i.e. a phi in the tail of the outer-loop).
332 FORNOW: we currently don't support the case that these phis
333 are not used in the outerloop, cause this case requires
334 to actually do something here. */
335 if (!STMT_VINFO_RELEVANT_P (stmt_info)
336 || STMT_VINFO_LIVE_P (stmt_info))
338 if (vect_print_dump_info (REPORT_DETAILS))
340 "Unsupported loop-closed phi in outer-loop.");
346 gcc_assert (stmt_info);
348 if (STMT_VINFO_LIVE_P (stmt_info))
350 /* FORNOW: not yet supported. */
351 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
352 fprintf (vect_dump, "not vectorized: value used after loop.");
356 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
357 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
359 /* A scalar-dependence cycle that we don't support. */
360 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
361 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
365 if (STMT_VINFO_RELEVANT_P (stmt_info))
367 need_to_vectorize = true;
368 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
369 ok = vectorizable_induction (phi, NULL, NULL);
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
377 "not vectorized: relevant phi not supported: ");
378 print_generic_expr (vect_dump, phi, TDF_SLIM);
384 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
386 tree stmt = bsi_stmt (si);
387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
388 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
390 if (vect_print_dump_info (REPORT_DETAILS))
392 fprintf (vect_dump, "==> examining statement: ");
393 print_generic_expr (vect_dump, stmt, TDF_SLIM);
396 gcc_assert (stmt_info);
398 /* skip stmts which do not need to be vectorized.
399 this is expected to include:
400 - the COND_EXPR which is the loop exit condition
401 - any LABEL_EXPRs in the loop
402 - computations that are used only for array indexing or loop
405 if (!STMT_VINFO_RELEVANT_P (stmt_info)
406 && !STMT_VINFO_LIVE_P (stmt_info))
408 if (vect_print_dump_info (REPORT_DETAILS))
409 fprintf (vect_dump, "irrelevant.");
413 switch (STMT_VINFO_DEF_TYPE (stmt_info))
418 case vect_reduction_def:
419 gcc_assert (relevance == vect_used_in_outer
420 || relevance == vect_used_in_outer_by_reduction
421 || relevance == vect_unused_in_loop);
424 case vect_induction_def:
425 case vect_constant_def:
426 case vect_invariant_def:
427 case vect_unknown_def_type:
432 if (STMT_VINFO_RELEVANT_P (stmt_info))
434 gcc_assert (GIMPLE_STMT_P (stmt)
435 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
436 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
437 need_to_vectorize = true;
440 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
441 || vectorizable_type_demotion (stmt, NULL, NULL)
442 || vectorizable_conversion (stmt, NULL, NULL)
443 || vectorizable_operation (stmt, NULL, NULL)
444 || vectorizable_assignment (stmt, NULL, NULL)
445 || vectorizable_load (stmt, NULL, NULL)
446 || vectorizable_call (stmt, NULL, NULL)
447 || vectorizable_store (stmt, NULL, NULL)
448 || vectorizable_condition (stmt, NULL, NULL)
449 || vectorizable_reduction (stmt, NULL, NULL));
451 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
452 need extra handling, except for vectorizable reductions. */
453 if (STMT_VINFO_LIVE_P (stmt_info)
454 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
455 ok |= vectorizable_live_operation (stmt, NULL, NULL);
459 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
461 fprintf (vect_dump, "not vectorized: stmt not supported: ");
462 print_generic_expr (vect_dump, stmt, TDF_SLIM);
469 /* All operations in the loop are either irrelevant (deal with loop
470 control, or dead), or only used outside the loop and can be moved
471 out of the loop (e.g. invariants, inductions). The loop can be
472 optimized away by scalar optimizations. We're better off not
473 touching this loop. */
474 if (!need_to_vectorize)
476 if (vect_print_dump_info (REPORT_DETAILS))
478 "All the computation can be taken out of the loop.");
479 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
481 "not vectorized: redundant loop. no profit to vectorize.");
485 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
486 && vect_print_dump_info (REPORT_DETAILS))
488 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
489 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
491 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
492 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
494 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
495 fprintf (vect_dump, "not vectorized: iteration count too small.");
496 if (vect_print_dump_info (REPORT_DETAILS))
497 fprintf (vect_dump,"not vectorized: iteration count smaller than "
498 "vectorization factor.");
502 /* Analyze cost. Decide if worth while to vectorize. */
504 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
505 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
506 if (min_profitable_iters < 0)
508 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
509 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
510 if (vect_print_dump_info (REPORT_DETAILS))
511 fprintf (vect_dump, "not vectorized: vector version will never be "
516 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
517 * vectorization_factor;
519 /* Use the cost model only if it is more conservative than user specified
522 th = (unsigned) min_scalar_loop_bound;
523 if (min_profitable_iters
524 && (!min_scalar_loop_bound
525 || min_profitable_iters > min_scalar_loop_bound))
526 th = (unsigned) min_profitable_iters;
528 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
529 && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
531 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
532 fprintf (vect_dump, "not vectorized: vectorization not "
534 if (vect_print_dump_info (REPORT_DETAILS))
535 fprintf (vect_dump, "not vectorized: iteration count smaller than "
536 "user specified loop bound parameter or minimum "
537 "profitable iterations (whichever is more conservative).");
541 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
542 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
543 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
545 if (vect_print_dump_info (REPORT_DETAILS))
546 fprintf (vect_dump, "epilog loop required.");
547 if (!vect_can_advance_ivs_p (loop_vinfo))
549 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
551 "not vectorized: can't create epilog loop 1.");
554 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
556 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
558 "not vectorized: can't create epilog loop 2.");
567 /* Function exist_non_indexing_operands_for_use_p
569 USE is one of the uses attached to STMT. Check if USE is
570 used in STMT for anything other than indexing an array. */
573 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
576 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
578 /* USE corresponds to some operand in STMT. If there is no data
579 reference in STMT, then any operand that corresponds to USE
580 is not indexing an array. */
581 if (!STMT_VINFO_DATA_REF (stmt_info))
584 /* STMT has a data_ref. FORNOW this means that its of one of
588 (This should have been verified in analyze_data_refs).
590 'var' in the second case corresponds to a def, not a use,
591 so USE cannot correspond to any operands that are not used
594 Therefore, all we need to check is if STMT falls into the
595 first case, and whether var corresponds to USE. */
597 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
600 operand = GIMPLE_STMT_OPERAND (stmt, 1);
602 if (TREE_CODE (operand) != SSA_NAME)
612 /* Function vect_analyze_scalar_cycles_1.
614 Examine the cross iteration def-use cycles of scalar variables
615 in LOOP. LOOP_VINFO represents the loop that is noe being
616 considered for vectorization (can be LOOP, or an outer-loop
620 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
623 basic_block bb = loop->header;
625 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
627 if (vect_print_dump_info (REPORT_DETAILS))
628 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
630 /* First - identify all inductions. */
631 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
633 tree access_fn = NULL;
634 tree def = PHI_RESULT (phi);
635 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
637 if (vect_print_dump_info (REPORT_DETAILS))
639 fprintf (vect_dump, "Analyze phi: ");
640 print_generic_expr (vect_dump, phi, TDF_SLIM);
643 /* Skip virtual phi's. The data dependences that are associated with
644 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
645 if (!is_gimple_reg (SSA_NAME_VAR (def)))
648 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
650 /* Analyze the evolution function. */
651 access_fn = analyze_scalar_evolution (loop, def);
652 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
654 fprintf (vect_dump, "Access function of PHI: ");
655 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
659 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
661 VEC_safe_push (tree, heap, worklist, phi);
665 if (vect_print_dump_info (REPORT_DETAILS))
666 fprintf (vect_dump, "Detected induction.");
667 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
671 /* Second - identify all reductions. */
672 while (VEC_length (tree, worklist) > 0)
674 tree phi = VEC_pop (tree, worklist);
675 tree def = PHI_RESULT (phi);
676 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
679 if (vect_print_dump_info (REPORT_DETAILS))
681 fprintf (vect_dump, "Analyze phi: ");
682 print_generic_expr (vect_dump, phi, TDF_SLIM);
685 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
686 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
688 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
691 if (vect_print_dump_info (REPORT_DETAILS))
692 fprintf (vect_dump, "Detected reduction.");
693 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
694 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
698 if (vect_print_dump_info (REPORT_DETAILS))
699 fprintf (vect_dump, "Unknown def-use cycle pattern.");
702 VEC_free (tree, heap, worklist);
707 /* Function vect_analyze_scalar_cycles.
709 Examine the cross iteration def-use cycles of scalar variables, by
710 analyzing the loop-header PHIs of scalar variables; Classify each
711 cycle as one of the following: invariant, induction, reduction, unknown.
712 We do that for the loop represented by LOOP_VINFO, and also to its
713 inner-loop, if exists.
714 Examples for scalar cycles:
729 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
731 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
733 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
735 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
736 Reductions in such inner-loop therefore have different properties than
737 the reductions in the nest that gets vectorized:
738 1. When vectorized, they are executed in the same order as in the original
739 scalar loop, so we can't change the order of computation when
741 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
742 current checks are too strict. */
745 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
749 /* Function vect_insert_into_interleaving_chain.
751 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
754 vect_insert_into_interleaving_chain (struct data_reference *dra,
755 struct data_reference *drb)
757 tree prev, next, next_init;
758 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
759 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
761 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
762 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
765 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
766 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
769 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
770 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
774 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
777 /* We got to the end of the list. Insert here. */
778 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
779 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
783 /* Function vect_update_interleaving_chain.
785 For two data-refs DRA and DRB that are a part of a chain interleaved data
786 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
788 There are four possible cases:
789 1. New stmts - both DRA and DRB are not a part of any chain:
792 2. DRB is a part of a chain and DRA is not:
793 no need to update FIRST_DR
794 no need to insert DRB
795 insert DRA according to init
796 3. DRA is a part of a chain and DRB is not:
797 if (init of FIRST_DR > init of DRB)
799 NEXT(FIRST_DR) = previous FIRST_DR
801 insert DRB according to its init
802 4. both DRA and DRB are in some interleaving chains:
803 choose the chain with the smallest init of FIRST_DR
804 insert the nodes of the second chain into the first one. */
807 vect_update_interleaving_chain (struct data_reference *drb,
808 struct data_reference *dra)
810 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
811 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
812 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
813 tree node, prev, next, node_init, first_stmt;
815 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
816 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
818 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
819 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
820 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
824 /* 2. DRB is a part of a chain and DRA is not. */
825 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
827 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
828 /* Insert DRA into the chain of DRB. */
829 vect_insert_into_interleaving_chain (dra, drb);
833 /* 3. DRA is a part of a chain and DRB is not. */
834 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
836 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
837 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
841 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
843 /* DRB's init is smaller than the init of the stmt previously marked
844 as the first stmt of the interleaving chain of DRA. Therefore, we
845 update FIRST_STMT and put DRB in the head of the list. */
846 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
847 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
849 /* Update all the stmts in the list to point to the new FIRST_STMT. */
850 tmp = old_first_stmt;
853 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
854 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
859 /* Insert DRB in the list of DRA. */
860 vect_insert_into_interleaving_chain (drb, dra);
861 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
866 /* 4. both DRA and DRB are in some interleaving chains. */
867 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
868 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
869 if (first_a == first_b)
871 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
872 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
874 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
876 /* Insert the nodes of DRA chain into the DRB chain.
877 After inserting a node, continue from this node of the DRB chain (don't
878 start from the beginning. */
879 node = DR_GROUP_FIRST_DR (stmtinfo_a);
880 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
881 first_stmt = first_b;
885 /* Insert the nodes of DRB chain into the DRA chain.
886 After inserting a node, continue from this node of the DRA chain (don't
887 start from the beginning. */
888 node = DR_GROUP_FIRST_DR (stmtinfo_b);
889 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
890 first_stmt = first_a;
895 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
896 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
899 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
900 if (tree_int_cst_compare (next_init, node_init) > 0)
903 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
904 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
909 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
913 /* We got to the end of the list. Insert here. */
914 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
915 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
918 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
919 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
924 /* Function vect_equal_offsets.
926 Check if OFFSET1 and OFFSET2 are identical expressions. */
929 vect_equal_offsets (tree offset1, tree offset2)
933 STRIP_NOPS (offset1);
934 STRIP_NOPS (offset2);
936 if (offset1 == offset2)
939 if (TREE_CODE (offset1) != TREE_CODE (offset2)
940 || !BINARY_CLASS_P (offset1)
941 || !BINARY_CLASS_P (offset2))
944 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
945 TREE_OPERAND (offset2, 0));
946 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
947 TREE_OPERAND (offset2, 1));
949 return (res0 && res1);
953 /* Function vect_check_interleaving.
955 Check if DRA and DRB are a part of interleaving. In case they are, insert
956 DRA and DRB in an interleaving chain. */
959 vect_check_interleaving (struct data_reference *dra,
960 struct data_reference *drb)
962 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
964 /* Check that the data-refs have same first location (except init) and they
965 are both either store or load (not load and store). */
966 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
967 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
968 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
969 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
970 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
971 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
972 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
973 || DR_IS_READ (dra) != DR_IS_READ (drb))
977 1. data-refs are of the same type
978 2. their steps are equal
979 3. the step is greater than the difference between data-refs' inits */
980 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
981 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
983 if (type_size_a != type_size_b
984 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
987 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
988 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
989 step = TREE_INT_CST_LOW (DR_STEP (dra));
993 /* If init_a == init_b + the size of the type * k, we have an interleaving,
994 and DRB is accessed before DRA. */
995 diff_mod_size = (init_a - init_b) % type_size_a;
997 if ((init_a - init_b) > step)
1000 if (diff_mod_size == 0)
1002 vect_update_interleaving_chain (drb, dra);
1003 if (vect_print_dump_info (REPORT_DR_DETAILS))
1005 fprintf (vect_dump, "Detected interleaving ");
1006 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1007 fprintf (vect_dump, " and ");
1008 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1015 /* If init_b == init_a + the size of the type * k, we have an
1016 interleaving, and DRA is accessed before DRB. */
1017 diff_mod_size = (init_b - init_a) % type_size_a;
1019 if ((init_b - init_a) > step)
1022 if (diff_mod_size == 0)
1024 vect_update_interleaving_chain (dra, drb);
1025 if (vect_print_dump_info (REPORT_DR_DETAILS))
1027 fprintf (vect_dump, "Detected interleaving ");
1028 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1029 fprintf (vect_dump, " and ");
1030 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1038 /* Function vect_analyze_data_ref_dependence.
1040 Return TRUE if there (might) exist a dependence between a memory-reference
1041 DRA and a memory-reference DRB. */
1044 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1045 loop_vec_info loop_vinfo)
1048 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1049 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1050 struct data_reference *dra = DDR_A (ddr);
1051 struct data_reference *drb = DDR_B (ddr);
1052 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1053 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1054 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1055 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1056 lambda_vector dist_v;
1057 unsigned int loop_depth;
1059 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1061 /* Independent data accesses. */
1062 vect_check_interleaving (dra, drb);
1066 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1069 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1071 if (vect_print_dump_info (REPORT_DR_DETAILS))
1074 "versioning for alias required: can't determine dependence between ");
1075 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1076 fprintf (vect_dump, " and ");
1077 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1082 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1084 if (vect_print_dump_info (REPORT_DR_DETAILS))
1086 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1087 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1088 fprintf (vect_dump, " and ");
1089 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1094 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1095 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1097 int dist = dist_v[loop_depth];
1099 if (vect_print_dump_info (REPORT_DR_DETAILS))
1100 fprintf (vect_dump, "dependence distance = %d.", dist);
1102 /* Same loop iteration. */
1103 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1105 /* Two references with distance zero have the same alignment. */
1106 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1107 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1108 if (vect_print_dump_info (REPORT_ALIGNMENT))
1109 fprintf (vect_dump, "accesses have the same alignment.");
1110 if (vect_print_dump_info (REPORT_DR_DETAILS))
1112 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1113 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1114 fprintf (vect_dump, " and ");
1115 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1118 /* For interleaving, mark that there is a read-write dependency if
1119 necessary. We check before that one of the data-refs is store. */
1120 if (DR_IS_READ (dra))
1121 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1124 if (DR_IS_READ (drb))
1125 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1131 if (abs (dist) >= vectorization_factor)
1133 /* Dependence distance does not create dependence, as far as vectorization
1134 is concerned, in this case. */
1135 if (vect_print_dump_info (REPORT_DR_DETAILS))
1136 fprintf (vect_dump, "dependence distance >= VF.");
1140 if (vect_print_dump_info (REPORT_DR_DETAILS))
1143 "versioning for alias required: possible dependence "
1144 "between data-refs ");
1145 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1146 fprintf (vect_dump, " and ");
1147 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1156 /* Return TRUE if DDR_NEW is already found in MAY_ALIAS_DDRS list. */
1159 vect_is_duplicate_ddr (VEC (ddr_p, heap) * may_alias_ddrs, ddr_p ddr_new)
1164 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
1166 tree dref_A_i, dref_B_i, dref_A_j, dref_B_j;
1168 dref_A_i = DR_REF (DDR_A (ddr));
1169 dref_B_i = DR_REF (DDR_B (ddr));
1170 dref_A_j = DR_REF (DDR_A (ddr_new));
1171 dref_B_j = DR_REF (DDR_B (ddr_new));
1173 if ((operand_equal_p (dref_A_i, dref_A_j, 0)
1174 && operand_equal_p (dref_B_i, dref_B_j, 0))
1175 || (operand_equal_p (dref_A_i, dref_B_j, 0)
1176 && operand_equal_p (dref_B_i, dref_A_j, 0)))
1178 if (vect_print_dump_info (REPORT_DR_DETAILS))
1180 fprintf (vect_dump, "found same pair of data references ");
1181 print_generic_expr (vect_dump, dref_A_i, TDF_SLIM);
1182 fprintf (vect_dump, " and ");
1183 print_generic_expr (vect_dump, dref_B_i, TDF_SLIM);
1191 /* Save DDR in LOOP_VINFO list of ddrs that may alias and need to be
1192 tested at run-time. Returns false if number of run-time checks
1193 inserted by vectorizer is greater than maximum defined by
1194 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS. */
1196 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1198 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1200 if (vect_print_dump_info (REPORT_DR_DETAILS))
1202 fprintf (vect_dump, "mark for run-time aliasing test between ");
1203 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1204 fprintf (vect_dump, " and ");
1205 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1208 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1211 if (vect_print_dump_info (REPORT_DR_DETAILS))
1212 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1216 /* Do not add to the list duplicate ddrs. */
1217 if (vect_is_duplicate_ddr (LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr))
1220 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo))
1221 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1223 if (vect_print_dump_info (REPORT_DR_DETAILS))
1226 "disable versioning for alias - max number of generated "
1227 "checks exceeded.");
1230 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1234 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1238 /* Function vect_analyze_data_ref_dependences.
1240 Examine all the data references in the loop, and make sure there do not
1241 exist any data dependences between them. */
1244 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1247 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1248 struct data_dependence_relation *ddr;
1250 if (vect_print_dump_info (REPORT_DETAILS))
1251 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1253 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1254 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1256 /* Add to list of ddrs that need to be tested at run-time. */
1257 if (!vect_mark_for_runtime_alias_test (ddr, loop_vinfo))
1265 /* Function vect_compute_data_ref_alignment
1267 Compute the misalignment of the data reference DR.
1270 1. If during the misalignment computation it is found that the data reference
1271 cannot be vectorized then false is returned.
1272 2. DR_MISALIGNMENT (DR) is defined.
1274 FOR NOW: No analysis is actually performed. Misalignment is calculated
1275 only for trivial cases. TODO. */
1278 vect_compute_data_ref_alignment (struct data_reference *dr)
1280 tree stmt = DR_STMT (dr);
1281 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1282 tree ref = DR_REF (dr);
1284 tree base, base_addr;
1287 tree aligned_to, alignment;
1289 if (vect_print_dump_info (REPORT_DETAILS))
1290 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1292 /* Initialize misalignment to unknown. */
1293 SET_DR_MISALIGNMENT (dr, -1);
1295 misalign = DR_INIT (dr);
1296 aligned_to = DR_ALIGNED_TO (dr);
1297 base_addr = DR_BASE_ADDRESS (dr);
1298 base = build_fold_indirect_ref (base_addr);
1299 vectype = STMT_VINFO_VECTYPE (stmt_info);
1300 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1302 if (tree_int_cst_compare (aligned_to, alignment) < 0)
1304 if (vect_print_dump_info (REPORT_DETAILS))
1306 fprintf (vect_dump, "Unknown alignment for access: ");
1307 print_generic_expr (vect_dump, base, TDF_SLIM);
1313 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1315 || (TREE_CODE (base_addr) == SSA_NAME
1316 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1317 TREE_TYPE (base_addr)))),
1319 base_aligned = true;
1321 base_aligned = false;
1325 /* Do not change the alignment of global variables if
1326 flag_section_anchors is enabled. */
1327 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1328 || (TREE_STATIC (base) && flag_section_anchors))
1330 if (vect_print_dump_info (REPORT_DETAILS))
1332 fprintf (vect_dump, "can't force alignment of ref: ");
1333 print_generic_expr (vect_dump, ref, TDF_SLIM);
1338 /* Force the alignment of the decl.
1339 NOTE: This is the only change to the code we make during
1340 the analysis phase, before deciding to vectorize the loop. */
1341 if (vect_print_dump_info (REPORT_DETAILS))
1342 fprintf (vect_dump, "force alignment");
1343 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1344 DECL_USER_ALIGN (base) = 1;
1347 /* At this point we assume that the base is aligned. */
1348 gcc_assert (base_aligned
1349 || (TREE_CODE (base) == VAR_DECL
1350 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1352 /* Modulo alignment. */
1353 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1355 if (!host_integerp (misalign, 1))
1357 /* Negative or overflowed misalignment value. */
1358 if (vect_print_dump_info (REPORT_DETAILS))
1359 fprintf (vect_dump, "unexpected misalign value");
1363 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1365 if (vect_print_dump_info (REPORT_DETAILS))
1367 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1368 print_generic_expr (vect_dump, ref, TDF_SLIM);
1375 /* Function vect_compute_data_refs_alignment
1377 Compute the misalignment of data references in the loop.
1378 Return FALSE if a data reference is found that cannot be vectorized. */
1381 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1383 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1384 struct data_reference *dr;
1387 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1388 if (!vect_compute_data_ref_alignment (dr))
1395 /* Function vect_update_misalignment_for_peel
1397 DR - the data reference whose misalignment is to be adjusted.
1398 DR_PEEL - the data reference whose misalignment is being made
1399 zero in the vector loop by the peel.
1400 NPEEL - the number of iterations in the peel loop if the misalignment
1401 of DR_PEEL is known at compile time. */
1404 vect_update_misalignment_for_peel (struct data_reference *dr,
1405 struct data_reference *dr_peel, int npeel)
1408 VEC(dr_p,heap) *same_align_drs;
1409 struct data_reference *current_dr;
1410 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1411 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1412 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1413 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1415 /* For interleaved data accesses the step in the loop must be multiplied by
1416 the size of the interleaving group. */
1417 if (DR_GROUP_FIRST_DR (stmt_info))
1418 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1419 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1420 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1422 /* It can be assumed that the data refs with the same alignment as dr_peel
1423 are aligned in the vector loop. */
1425 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1426 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1428 if (current_dr != dr)
1430 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1431 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1432 SET_DR_MISALIGNMENT (dr, 0);
1436 if (known_alignment_for_access_p (dr)
1437 && known_alignment_for_access_p (dr_peel))
1439 int misal = DR_MISALIGNMENT (dr);
1440 misal += npeel * dr_size;
1441 misal %= UNITS_PER_SIMD_WORD;
1442 SET_DR_MISALIGNMENT (dr, misal);
1446 if (vect_print_dump_info (REPORT_DETAILS))
1447 fprintf (vect_dump, "Setting misalignment to -1.");
1448 SET_DR_MISALIGNMENT (dr, -1);
1452 /* Function vect_verify_datarefs_alignment
1454 Return TRUE if all data references in the loop can be
1455 handled with respect to alignment. */
1458 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1460 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1461 struct data_reference *dr;
1462 enum dr_alignment_support supportable_dr_alignment;
1465 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1467 tree stmt = DR_STMT (dr);
1468 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1470 /* For interleaving, only the alignment of the first access matters. */
1471 if (DR_GROUP_FIRST_DR (stmt_info)
1472 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1475 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1476 if (!supportable_dr_alignment)
1478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1480 if (DR_IS_READ (dr))
1482 "not vectorized: unsupported unaligned load.");
1485 "not vectorized: unsupported unaligned store.");
1489 if (supportable_dr_alignment != dr_aligned
1490 && vect_print_dump_info (REPORT_ALIGNMENT))
1491 fprintf (vect_dump, "Vectorizing an unaligned access.");
1497 /* Function vector_alignment_reachable_p
1499 Return true if vector alignment for DR is reachable by peeling
1500 a few loop iterations. Return false otherwise. */
1503 vector_alignment_reachable_p (struct data_reference *dr)
1505 tree stmt = DR_STMT (dr);
1506 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1507 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1509 if (DR_GROUP_FIRST_DR (stmt_info))
1511 /* For interleaved access we peel only if number of iterations in
1512 the prolog loop ({VF - misalignment}), is a multiple of the
1513 number of the interleaved accesses. */
1514 int elem_size, mis_in_elements;
1515 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1517 /* FORNOW: handle only known alignment. */
1518 if (!known_alignment_for_access_p (dr))
1521 elem_size = UNITS_PER_SIMD_WORD / nelements;
1522 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1524 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1528 /* If misalignment is known at the compile time then allow peeling
1529 only if natural alignment is reachable through peeling. */
1530 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1532 HOST_WIDE_INT elmsize =
1533 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1534 if (vect_print_dump_info (REPORT_DETAILS))
1536 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1537 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1539 if (DR_MISALIGNMENT (dr) % elmsize)
1541 if (vect_print_dump_info (REPORT_DETAILS))
1542 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1547 if (!known_alignment_for_access_p (dr))
1549 tree type = (TREE_TYPE (DR_REF (dr)));
1550 tree ba = DR_BASE_OBJECT (dr);
1551 bool is_packed = false;
1554 is_packed = contains_packed_reference (ba);
1556 if (vect_print_dump_info (REPORT_DETAILS))
1557 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1558 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1567 /* Function vect_enhance_data_refs_alignment
1569 This pass will use loop versioning and loop peeling in order to enhance
1570 the alignment of data references in the loop.
1572 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1573 original loop is to be vectorized; Any other loops that are created by
1574 the transformations performed in this pass - are not supposed to be
1575 vectorized. This restriction will be relaxed.
1577 This pass will require a cost model to guide it whether to apply peeling
1578 or versioning or a combination of the two. For example, the scheme that
1579 intel uses when given a loop with several memory accesses, is as follows:
1580 choose one memory access ('p') which alignment you want to force by doing
1581 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1582 other accesses are not necessarily aligned, or (2) use loop versioning to
1583 generate one loop in which all accesses are aligned, and another loop in
1584 which only 'p' is necessarily aligned.
1586 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1587 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1588 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1590 Devising a cost model is the most critical aspect of this work. It will
1591 guide us on which access to peel for, whether to use loop versioning, how
1592 many versions to create, etc. The cost model will probably consist of
1593 generic considerations as well as target specific considerations (on
1594 powerpc for example, misaligned stores are more painful than misaligned
1597 Here are the general steps involved in alignment enhancements:
1599 -- original loop, before alignment analysis:
1600 for (i=0; i<N; i++){
1601 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1602 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1605 -- After vect_compute_data_refs_alignment:
1606 for (i=0; i<N; i++){
1607 x = q[i]; # DR_MISALIGNMENT(q) = 3
1608 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1611 -- Possibility 1: we do loop versioning:
1613 for (i=0; i<N; i++){ # loop 1A
1614 x = q[i]; # DR_MISALIGNMENT(q) = 3
1615 p[i] = y; # DR_MISALIGNMENT(p) = 0
1619 for (i=0; i<N; i++){ # loop 1B
1620 x = q[i]; # DR_MISALIGNMENT(q) = 3
1621 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1625 -- Possibility 2: we do loop peeling:
1626 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1630 for (i = 3; i < N; i++){ # loop 2A
1631 x = q[i]; # DR_MISALIGNMENT(q) = 0
1632 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1635 -- Possibility 3: combination of loop peeling and versioning:
1636 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1641 for (i = 3; i<N; i++){ # loop 3A
1642 x = q[i]; # DR_MISALIGNMENT(q) = 0
1643 p[i] = y; # DR_MISALIGNMENT(p) = 0
1647 for (i = 3; i<N; i++){ # loop 3B
1648 x = q[i]; # DR_MISALIGNMENT(q) = 0
1649 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1653 These loops are later passed to loop_transform to be vectorized. The
1654 vectorizer will use the alignment information to guide the transformation
1655 (whether to generate regular loads/stores, or with special handling for
1659 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1661 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1662 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1663 enum dr_alignment_support supportable_dr_alignment;
1664 struct data_reference *dr0 = NULL;
1665 struct data_reference *dr;
1667 bool do_peeling = false;
1668 bool do_versioning = false;
1671 stmt_vec_info stmt_info;
1672 int vect_versioning_for_alias_required;
1674 if (vect_print_dump_info (REPORT_DETAILS))
1675 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1677 /* While cost model enhancements are expected in the future, the high level
1678 view of the code at this time is as follows:
1680 A) If there is a misaligned write then see if peeling to align this write
1681 can make all data references satisfy vect_supportable_dr_alignment.
1682 If so, update data structures as needed and return true. Note that
1683 at this time vect_supportable_dr_alignment is known to return false
1684 for a misaligned write.
1686 B) If peeling wasn't possible and there is a data reference with an
1687 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1688 then see if loop versioning checks can be used to make all data
1689 references satisfy vect_supportable_dr_alignment. If so, update
1690 data structures as needed and return true.
1692 C) If neither peeling nor versioning were successful then return false if
1693 any data reference does not satisfy vect_supportable_dr_alignment.
1695 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1697 Note, Possibility 3 above (which is peeling and versioning together) is not
1698 being done at this time. */
1700 /* (1) Peeling to force alignment. */
1702 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1704 + How many accesses will become aligned due to the peeling
1705 - How many accesses will become unaligned due to the peeling,
1706 and the cost of misaligned accesses.
1707 - The cost of peeling (the extra runtime checks, the increase
1710 The scheme we use FORNOW: peel to force the alignment of the first
1711 misaligned store in the loop.
1712 Rationale: misaligned stores are not yet supported.
1714 TODO: Use a cost model. */
1716 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1718 stmt = DR_STMT (dr);
1719 stmt_info = vinfo_for_stmt (stmt);
1721 /* For interleaving, only the alignment of the first access
1723 if (DR_GROUP_FIRST_DR (stmt_info)
1724 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1727 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1729 do_peeling = vector_alignment_reachable_p (dr);
1732 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1733 fprintf (vect_dump, "vector alignment may not be reachable");
1738 vect_versioning_for_alias_required =
1739 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1741 /* Temporarily, if versioning for alias is required, we disable peeling
1742 until we support peeling and versioning. Often peeling for alignment
1743 will require peeling for loop-bound, which in turn requires that we
1744 know how to adjust the loop ivs after the loop. */
1745 if (vect_versioning_for_alias_required
1746 || !vect_can_advance_ivs_p (loop_vinfo)
1747 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1754 tree stmt = DR_STMT (dr0);
1755 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1756 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1757 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1759 if (known_alignment_for_access_p (dr0))
1761 /* Since it's known at compile time, compute the number of iterations
1762 in the peeled loop (the peeling factor) for use in updating
1763 DR_MISALIGNMENT values. The peeling factor is the vectorization
1764 factor minus the misalignment as an element count. */
1765 mis = DR_MISALIGNMENT (dr0);
1766 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1767 npeel = nelements - mis;
1769 /* For interleaved data access every iteration accesses all the
1770 members of the group, therefore we divide the number of iterations
1771 by the group size. */
1772 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1773 if (DR_GROUP_FIRST_DR (stmt_info))
1774 npeel /= DR_GROUP_SIZE (stmt_info);
1776 if (vect_print_dump_info (REPORT_DETAILS))
1777 fprintf (vect_dump, "Try peeling by %d", npeel);
1780 /* Ensure that all data refs can be vectorized after the peel. */
1781 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1783 int save_misalignment;
1788 stmt = DR_STMT (dr);
1789 stmt_info = vinfo_for_stmt (stmt);
1790 /* For interleaving, only the alignment of the first access
1792 if (DR_GROUP_FIRST_DR (stmt_info)
1793 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1796 save_misalignment = DR_MISALIGNMENT (dr);
1797 vect_update_misalignment_for_peel (dr, dr0, npeel);
1798 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1799 SET_DR_MISALIGNMENT (dr, save_misalignment);
1801 if (!supportable_dr_alignment)
1810 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1811 If the misalignment of DR_i is identical to that of dr0 then set
1812 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1813 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1814 by the peeling factor times the element size of DR_i (MOD the
1815 vectorization factor times the size). Otherwise, the
1816 misalignment of DR_i must be set to unknown. */
1817 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1819 vect_update_misalignment_for_peel (dr, dr0, npeel);
1821 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1822 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1823 SET_DR_MISALIGNMENT (dr0, 0);
1824 if (vect_print_dump_info (REPORT_ALIGNMENT))
1825 fprintf (vect_dump, "Alignment of access forced using peeling.");
1827 if (vect_print_dump_info (REPORT_DETAILS))
1828 fprintf (vect_dump, "Peeling for alignment will be applied.");
1830 stat = vect_verify_datarefs_alignment (loop_vinfo);
1837 /* (2) Versioning to force alignment. */
1839 /* Try versioning if:
1840 1) flag_tree_vect_loop_version is TRUE
1841 2) optimize_size is FALSE
1842 3) there is at least one unsupported misaligned data ref with an unknown
1844 4) all misaligned data refs with a known misalignment are supported, and
1845 5) the number of runtime alignment checks is within reason. */
1848 flag_tree_vect_loop_version
1850 && (!loop->inner); /* FORNOW */
1854 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1856 stmt = DR_STMT (dr);
1857 stmt_info = vinfo_for_stmt (stmt);
1859 /* For interleaving, only the alignment of the first access
1861 if (aligned_access_p (dr)
1862 || (DR_GROUP_FIRST_DR (stmt_info)
1863 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1866 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1868 if (!supportable_dr_alignment)
1874 if (known_alignment_for_access_p (dr)
1875 || VEC_length (tree,
1876 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1877 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1879 do_versioning = false;
1883 stmt = DR_STMT (dr);
1884 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1885 gcc_assert (vectype);
1887 /* The rightmost bits of an aligned address must be zeros.
1888 Construct the mask needed for this test. For example,
1889 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1890 mask must be 15 = 0xf. */
1891 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1893 /* FORNOW: use the same mask to test all potentially unaligned
1894 references in the loop. The vectorizer currently supports
1895 a single vector size, see the reference to
1896 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1897 vectorization factor is computed. */
1898 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1899 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1900 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1901 VEC_safe_push (tree, heap,
1902 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1907 /* Versioning requires at least one misaligned data reference. */
1908 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1909 do_versioning = false;
1910 else if (!do_versioning)
1911 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1916 VEC(tree,heap) *may_misalign_stmts
1917 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1920 /* It can now be assumed that the data references in the statements
1921 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1922 of the loop being vectorized. */
1923 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1925 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1926 dr = STMT_VINFO_DATA_REF (stmt_info);
1927 SET_DR_MISALIGNMENT (dr, 0);
1928 if (vect_print_dump_info (REPORT_ALIGNMENT))
1929 fprintf (vect_dump, "Alignment of access forced using versioning.");
1932 if (vect_print_dump_info (REPORT_DETAILS))
1933 fprintf (vect_dump, "Versioning for alignment will be applied.");
1935 /* Peeling and versioning can't be done together at this time. */
1936 gcc_assert (! (do_peeling && do_versioning));
1938 stat = vect_verify_datarefs_alignment (loop_vinfo);
1943 /* This point is reached if neither peeling nor versioning is being done. */
1944 gcc_assert (! (do_peeling || do_versioning));
1946 stat = vect_verify_datarefs_alignment (loop_vinfo);
1951 /* Function vect_analyze_data_refs_alignment
1953 Analyze the alignment of the data-references in the loop.
1954 Return FALSE if a data reference is found that cannot be vectorized. */
1957 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1959 if (vect_print_dump_info (REPORT_DETAILS))
1960 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1962 if (!vect_compute_data_refs_alignment (loop_vinfo))
1964 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1966 "not vectorized: can't calculate alignment for data ref.");
1974 /* Function vect_analyze_data_ref_access.
1976 Analyze the access pattern of the data-reference DR. For now, a data access
1977 has to be consecutive to be considered vectorizable. */
1980 vect_analyze_data_ref_access (struct data_reference *dr)
1982 tree step = DR_STEP (dr);
1983 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1984 tree scalar_type = TREE_TYPE (DR_REF (dr));
1985 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1986 tree stmt = DR_STMT (dr);
1987 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1988 interleaving group (including gaps). */
1989 HOST_WIDE_INT stride = dr_step / type_size;
1993 if (vect_print_dump_info (REPORT_DETAILS))
1994 fprintf (vect_dump, "bad data-ref access");
1999 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2001 /* Mark that it is not interleaving. */
2002 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2006 /* Not consecutive access is possible only if it is a part of interleaving. */
2007 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2009 /* Check if it this DR is a part of interleaving, and is a single
2010 element of the group that is accessed in the loop. */
2012 /* Gaps are supported only for loads. STEP must be a multiple of the type
2013 size. The size of the group must be a power of 2. */
2015 && (dr_step % type_size) == 0
2017 && exact_log2 (stride) != -1)
2019 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2020 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2021 if (vect_print_dump_info (REPORT_DR_DETAILS))
2023 fprintf (vect_dump, "Detected single element interleaving %d ",
2024 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2025 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2026 fprintf (vect_dump, " step ");
2027 print_generic_expr (vect_dump, step, TDF_SLIM);
2031 if (vect_print_dump_info (REPORT_DETAILS))
2032 fprintf (vect_dump, "not consecutive access");
2036 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2038 /* First stmt in the interleaving chain. Check the chain. */
2039 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2040 struct data_reference *data_ref = dr;
2041 unsigned int count = 1;
2043 tree prev_init = DR_INIT (data_ref);
2045 HOST_WIDE_INT diff, count_in_bytes;
2049 /* Skip same data-refs. In case that two or more stmts share data-ref
2050 (supported only for loads), we vectorize only the first stmt, and
2051 the rest get their vectorized loads from the first one. */
2052 if (!tree_int_cst_compare (DR_INIT (data_ref),
2053 DR_INIT (STMT_VINFO_DATA_REF (
2054 vinfo_for_stmt (next)))))
2056 if (!DR_IS_READ (data_ref))
2058 if (vect_print_dump_info (REPORT_DETAILS))
2059 fprintf (vect_dump, "Two store stmts share the same dr.");
2063 /* Check that there is no load-store dependencies for this loads
2064 to prevent a case of load-store-load to the same location. */
2065 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2066 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2068 if (vect_print_dump_info (REPORT_DETAILS))
2070 "READ_WRITE dependence in interleaving.");
2074 /* For load use the same data-ref load. */
2075 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2078 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2083 /* Check that all the accesses have the same STEP. */
2084 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2085 if (tree_int_cst_compare (step, next_step))
2087 if (vect_print_dump_info (REPORT_DETAILS))
2088 fprintf (vect_dump, "not consecutive access in interleaving");
2092 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2093 /* Check that the distance between two accesses is equal to the type
2094 size. Otherwise, we have gaps. */
2095 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2096 - TREE_INT_CST_LOW (prev_init)) / type_size;
2097 if (!DR_IS_READ (data_ref) && diff != 1)
2099 if (vect_print_dump_info (REPORT_DETAILS))
2100 fprintf (vect_dump, "interleaved store with gaps");
2103 /* Store the gap from the previous member of the group. If there is no
2104 gap in the access, DR_GROUP_GAP is always 1. */
2105 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2107 prev_init = DR_INIT (data_ref);
2108 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2109 /* Count the number of data-refs in the chain. */
2113 /* COUNT is the number of accesses found, we multiply it by the size of
2114 the type to get COUNT_IN_BYTES. */
2115 count_in_bytes = type_size * count;
2117 /* Check that the size of the interleaving is not greater than STEP. */
2118 if (dr_step < count_in_bytes)
2120 if (vect_print_dump_info (REPORT_DETAILS))
2122 fprintf (vect_dump, "interleaving size is greater than step for ");
2123 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2128 /* Check that the size of the interleaving is equal to STEP for stores,
2129 i.e., that there are no gaps. */
2130 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2132 if (vect_print_dump_info (REPORT_DETAILS))
2133 fprintf (vect_dump, "interleaved store with gaps");
2137 /* Check that STEP is a multiple of type size. */
2138 if ((dr_step % type_size) != 0)
2140 if (vect_print_dump_info (REPORT_DETAILS))
2142 fprintf (vect_dump, "step is not a multiple of type size: step ");
2143 print_generic_expr (vect_dump, step, TDF_SLIM);
2144 fprintf (vect_dump, " size ");
2145 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2151 /* FORNOW: we handle only interleaving that is a power of 2. */
2152 if (exact_log2 (stride) == -1)
2154 if (vect_print_dump_info (REPORT_DETAILS))
2155 fprintf (vect_dump, "interleaving is not a power of 2");
2158 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2164 /* Function vect_analyze_data_ref_accesses.
2166 Analyze the access pattern of all the data references in the loop.
2168 FORNOW: the only access pattern that is considered vectorizable is a
2169 simple step 1 (consecutive) access.
2171 FORNOW: handle only arrays and pointer accesses. */
2174 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2177 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2178 struct data_reference *dr;
2180 if (vect_print_dump_info (REPORT_DETAILS))
2181 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2183 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2184 if (!vect_analyze_data_ref_access (dr))
2186 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2187 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2195 /* Function vect_analyze_data_refs.
2197 Find all the data references in the loop.
2199 The general structure of the analysis of data refs in the vectorizer is as
2201 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
2202 find and analyze all data-refs in the loop and their dependences.
2203 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2204 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2205 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2210 vect_analyze_data_refs (loop_vec_info loop_vinfo)
2212 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2214 VEC (data_reference_p, heap) *datarefs;
2215 struct data_reference *dr;
2218 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2221 compute_data_dependences_for_loop (loop, true,
2222 &LOOP_VINFO_DATAREFS (loop_vinfo),
2223 &LOOP_VINFO_DDRS (loop_vinfo));
2225 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2226 from stmt_vec_info struct to DR and vectype. */
2227 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2229 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2232 stmt_vec_info stmt_info;
2235 if (!dr || !DR_REF (dr))
2237 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2238 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2242 /* Update DR field in stmt_vec_info struct. */
2243 stmt = DR_STMT (dr);
2244 stmt_info = vinfo_for_stmt (stmt);
2246 /* If outer-loop vectorization: we don't yet support datarefs
2247 in the innermost loop. */
2248 bb = bb_for_stmt (stmt);
2249 if (bb->loop_father != LOOP_VINFO_LOOP (loop_vinfo))
2251 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2252 fprintf (vect_dump, "not vectorized: data-ref in nested loop");
2256 if (STMT_VINFO_DATA_REF (stmt_info))
2258 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2261 "not vectorized: more than one data ref in stmt: ");
2262 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2266 STMT_VINFO_DATA_REF (stmt_info) = dr;
2268 /* Check that analysis of the data-ref succeeded. */
2269 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2272 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2274 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2275 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2280 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2282 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2283 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2288 if (!DR_SYMBOL_TAG (dr))
2290 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2292 fprintf (vect_dump, "not vectorized: no memory tag for ");
2293 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2298 /* Set vectype for STMT. */
2299 scalar_type = TREE_TYPE (DR_REF (dr));
2300 STMT_VINFO_VECTYPE (stmt_info) =
2301 get_vectype_for_scalar_type (scalar_type);
2302 if (!STMT_VINFO_VECTYPE (stmt_info))
2304 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2307 "not vectorized: no vectype for stmt: ");
2308 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2309 fprintf (vect_dump, " scalar_type: ");
2310 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2320 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2322 /* Function vect_mark_relevant.
2324 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2327 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2328 enum vect_relevant relevant, bool live_p)
2330 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2331 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2332 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2334 if (vect_print_dump_info (REPORT_DETAILS))
2335 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2337 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2341 /* This is the last stmt in a sequence that was detected as a
2342 pattern that can potentially be vectorized. Don't mark the stmt
2343 as relevant/live because it's not going to be vectorized.
2344 Instead mark the pattern-stmt that replaces it. */
2346 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2348 if (vect_print_dump_info (REPORT_DETAILS))
2349 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2350 stmt_info = vinfo_for_stmt (pattern_stmt);
2351 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2352 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2353 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2354 stmt = pattern_stmt;
2357 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2358 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2359 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2361 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2362 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "already marked relevant/live.");
2369 VEC_safe_push (tree, heap, *worklist, stmt);
2373 /* Function vect_stmt_relevant_p.
2375 Return true if STMT in loop that is represented by LOOP_VINFO is
2376 "relevant for vectorization".
2378 A stmt is considered "relevant for vectorization" if:
2379 - it has uses outside the loop.
2380 - it has vdefs (it alters memory).
2381 - control stmts in the loop (except for the exit condition).
2383 CHECKME: what other side effects would the vectorizer allow? */
2386 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2387 enum vect_relevant *relevant, bool *live_p)
2389 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2390 ssa_op_iter op_iter;
2391 imm_use_iterator imm_iter;
2392 use_operand_p use_p;
2393 def_operand_p def_p;
2395 *relevant = vect_unused_in_loop;
2398 /* cond stmt other than loop exit cond. */
2399 if (is_ctrl_stmt (stmt)
2400 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
2401 *relevant = vect_used_in_loop;
2403 /* changing memory. */
2404 if (TREE_CODE (stmt) != PHI_NODE)
2405 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2409 *relevant = vect_used_in_loop;
2412 /* uses outside the loop. */
2413 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2415 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2417 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2418 if (!flow_bb_inside_loop_p (loop, bb))
2420 if (vect_print_dump_info (REPORT_DETAILS))
2421 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2423 /* We expect all such uses to be in the loop exit phis
2424 (because of loop closed form) */
2425 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2426 gcc_assert (bb == single_exit (loop)->dest);
2433 return (*live_p || *relevant);
2438 Function process_use.
2441 - a USE in STMT in a loop represented by LOOP_VINFO
2442 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2443 that defined USE. This is dont by calling mark_relevant and passing it
2444 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2447 Generally, LIVE_P and RELEVANT are used to define the liveness and
2448 relevance info of the DEF_STMT of this USE:
2449 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2450 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2452 - case 1: If USE is used only for address computations (e.g. array indexing),
2453 which does not need to be directly vectorized, then the liveness/relevance
2454 of the respective DEF_STMT is left unchanged.
2455 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2456 skip DEF_STMT cause it had already been processed.
2457 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
2458 be modified accordingly.
2460 Return true if everything is as expected. Return false otherwise. */
2463 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
2464 enum vect_relevant relevant, VEC(tree,heap) **worklist)
2466 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2467 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2468 stmt_vec_info dstmt_vinfo;
2469 basic_block bb, def_bb;
2471 enum vect_def_type dt;
2473 /* case 1: we are only interested in uses that need to be vectorized. Uses
2474 that are used for address computation are not considered relevant. */
2475 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2478 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2480 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2481 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2485 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2488 def_bb = bb_for_stmt (def_stmt);
2489 if (!flow_bb_inside_loop_p (loop, def_bb))
2491 if (vect_print_dump_info (REPORT_DETAILS))
2492 fprintf (vect_dump, "def_stmt is out of loop.");
2496 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
2497 DEF_STMT must have already been processed, because this should be the
2498 only way that STMT, which is a reduction-phi, was put in the worklist,
2499 as there should be no other uses for DEF_STMT in the loop. So we just
2500 check that everything is as expected, and we are done. */
2501 dstmt_vinfo = vinfo_for_stmt (def_stmt);
2502 bb = bb_for_stmt (stmt);
2503 if (TREE_CODE (stmt) == PHI_NODE
2504 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2505 && TREE_CODE (def_stmt) != PHI_NODE
2506 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
2507 && bb->loop_father == def_bb->loop_father)
2509 if (vect_print_dump_info (REPORT_DETAILS))
2510 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
2511 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
2512 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
2513 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
2514 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
2515 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
2519 /* case 3a: outer-loop stmt defining an inner-loop stmt:
2520 outer-loop-header-bb:
2526 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
2528 if (vect_print_dump_info (REPORT_DETAILS))
2529 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
2532 case vect_unused_in_loop:
2533 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
2534 vect_used_by_reduction : vect_unused_in_loop;
2536 case vect_used_in_outer_by_reduction:
2537 relevant = vect_used_by_reduction;
2539 case vect_used_in_outer:
2540 relevant = vect_used_in_loop;
2542 case vect_used_by_reduction:
2543 case vect_used_in_loop:
2551 /* case 3b: inner-loop stmt defining an outer-loop stmt:
2552 outer-loop-header-bb:
2558 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
2560 if (vect_print_dump_info (REPORT_DETAILS))
2561 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
2564 case vect_unused_in_loop:
2565 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
2566 vect_used_in_outer_by_reduction : vect_unused_in_loop;
2569 case vect_used_in_outer_by_reduction:
2570 case vect_used_in_outer:
2573 case vect_used_by_reduction:
2574 relevant = vect_used_in_outer_by_reduction;
2577 case vect_used_in_loop:
2578 relevant = vect_used_in_outer;
2586 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
2591 /* Function vect_mark_stmts_to_be_vectorized.
2593 Not all stmts in the loop need to be vectorized. For example:
2602 Stmt 1 and 3 do not need to be vectorized, because loop control and
2603 addressing of vectorized data-refs are handled differently.
2605 This pass detects such stmts. */
2608 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2610 VEC(tree,heap) *worklist;
2611 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2612 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2613 unsigned int nbbs = loop->num_nodes;
2614 block_stmt_iterator si;
2618 stmt_vec_info stmt_vinfo;
2622 enum vect_relevant relevant;
2624 if (vect_print_dump_info (REPORT_DETAILS))
2625 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2627 worklist = VEC_alloc (tree, heap, 64);
2629 /* 1. Init worklist. */
2630 for (i = 0; i < nbbs; i++)
2633 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2635 if (vect_print_dump_info (REPORT_DETAILS))
2637 fprintf (vect_dump, "init: phi relevant? ");
2638 print_generic_expr (vect_dump, phi, TDF_SLIM);
2641 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2642 vect_mark_relevant (&worklist, phi, relevant, live_p);
2644 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2646 stmt = bsi_stmt (si);
2647 if (vect_print_dump_info (REPORT_DETAILS))
2649 fprintf (vect_dump, "init: stmt relevant? ");
2650 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2653 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2654 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2658 /* 2. Process_worklist */
2659 while (VEC_length (tree, worklist) > 0)
2661 use_operand_p use_p;
2664 stmt = VEC_pop (tree, worklist);
2665 if (vect_print_dump_info (REPORT_DETAILS))
2667 fprintf (vect_dump, "worklist: examine stmt: ");
2668 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2671 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2672 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2673 liveness and relevance properties of STMT. */
2674 ann = stmt_ann (stmt);
2675 stmt_vinfo = vinfo_for_stmt (stmt);
2676 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2677 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2679 /* Generally, the liveness and relevance properties of STMT are
2680 propagated as is to the DEF_STMTs of its USEs:
2681 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2682 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2684 One exception is when STMT has been identified as defining a reduction
2685 variable; in this case we set the liveness/relevance as follows:
2687 relevant = vect_used_by_reduction
2688 This is because we distinguish between two kinds of relevant stmts -
2689 those that are used by a reduction computation, and those that are
2690 (also) used by a regular computation. This allows us later on to
2691 identify stmts that are used solely by a reduction, and therefore the
2692 order of the results that they produce does not have to be kept.
2694 Reduction phis are expected to be used by a reduction stmt, or by
2695 in an outer loop; Other reduction stmts are expected to be
2696 in the loop, and possibly used by a stmt in an outer loop.
2697 Here are the expected values of "relevant" for reduction phis/stmts:
2700 vect_unused_in_loop ok
2701 vect_used_in_outer_by_reduction ok ok
2702 vect_used_in_outer ok ok
2703 vect_used_by_reduction ok
2704 vect_used_in_loop */
2706 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2708 enum vect_relevant tmp_relevant = relevant;
2709 switch (tmp_relevant)
2711 case vect_unused_in_loop:
2712 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2713 relevant = vect_used_by_reduction;
2716 case vect_used_in_outer_by_reduction:
2717 case vect_used_in_outer:
2718 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
2719 && TREE_CODE (stmt) != DOT_PROD_EXPR);
2722 case vect_used_by_reduction:
2723 if (TREE_CODE (stmt) == PHI_NODE)
2726 case vect_used_in_loop:
2728 if (vect_print_dump_info (REPORT_DETAILS))
2729 fprintf (vect_dump, "unsupported use of reduction.");
2730 VEC_free (tree, heap, worklist);
2736 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2738 tree op = USE_FROM_PTR (use_p);
2739 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
2741 VEC_free (tree, heap, worklist);
2745 } /* while worklist */
2747 VEC_free (tree, heap, worklist);
2752 /* Function vect_can_advance_ivs_p
2754 In case the number of iterations that LOOP iterates is unknown at compile
2755 time, an epilog loop will be generated, and the loop induction variables
2756 (IVs) will be "advanced" to the value they are supposed to take just before
2757 the epilog loop. Here we check that the access function of the loop IVs
2758 and the expression that represents the loop bound are simple enough.
2759 These restrictions will be relaxed in the future. */
2762 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2764 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2765 basic_block bb = loop->header;
2768 /* Analyze phi functions of the loop header. */
2770 if (vect_print_dump_info (REPORT_DETAILS))
2771 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2773 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2775 tree access_fn = NULL;
2776 tree evolution_part;
2778 if (vect_print_dump_info (REPORT_DETAILS))
2780 fprintf (vect_dump, "Analyze phi: ");
2781 print_generic_expr (vect_dump, phi, TDF_SLIM);
2784 /* Skip virtual phi's. The data dependences that are associated with
2785 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2787 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2789 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "virtual phi. skip.");
2794 /* Skip reduction phis. */
2796 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2798 if (vect_print_dump_info (REPORT_DETAILS))
2799 fprintf (vect_dump, "reduc phi. skip.");
2803 /* Analyze the evolution function. */
2805 access_fn = instantiate_parameters
2806 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2810 if (vect_print_dump_info (REPORT_DETAILS))
2811 fprintf (vect_dump, "No Access function.");
2815 if (vect_print_dump_info (REPORT_DETAILS))
2817 fprintf (vect_dump, "Access function of PHI: ");
2818 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2821 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2823 if (evolution_part == NULL_TREE)
2825 if (vect_print_dump_info (REPORT_DETAILS))
2826 fprintf (vect_dump, "No evolution.");
2830 /* FORNOW: We do not transform initial conditions of IVs
2831 which evolution functions are a polynomial of degree >= 2. */
2833 if (tree_is_chrec (evolution_part))
2841 /* Function vect_get_loop_niters.
2843 Determine how many iterations the loop is executed.
2844 If an expression that represents the number of iterations
2845 can be constructed, place it in NUMBER_OF_ITERATIONS.
2846 Return the loop exit condition. */
2849 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2853 if (vect_print_dump_info (REPORT_DETAILS))
2854 fprintf (vect_dump, "=== get_loop_niters ===");
2856 niters = number_of_exit_cond_executions (loop);
2858 if (niters != NULL_TREE
2859 && niters != chrec_dont_know)
2861 *number_of_iterations = niters;
2863 if (vect_print_dump_info (REPORT_DETAILS))
2865 fprintf (vect_dump, "==> get_loop_niters:" );
2866 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2870 return get_loop_exit_condition (loop);
2874 /* Function vect_analyze_loop_1.
2876 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2877 for it. The different analyses will record information in the
2878 loop_vec_info struct. This is a subset of the analyses applied in
2879 vect_analyze_loop, to be applied on an inner-loop nested in the loop
2880 that is now considered for (outer-loop) vectorization. */
2882 static loop_vec_info
2883 vect_analyze_loop_1 (struct loop *loop)
2885 loop_vec_info loop_vinfo;
2887 if (vect_print_dump_info (REPORT_DETAILS))
2888 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
2890 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2892 loop_vinfo = vect_analyze_loop_form (loop);
2895 if (vect_print_dump_info (REPORT_DETAILS))
2896 fprintf (vect_dump, "bad inner-loop form.");
2904 /* Function vect_analyze_loop_form.
2906 Verify that certain CFG restrictions hold, including:
2907 - the loop has a pre-header
2908 - the loop has a single entry and exit
2909 - the loop exit condition is simple enough, and the number of iterations
2910 can be analyzed (a countable loop). */
2912 static loop_vec_info
2913 vect_analyze_loop_form (struct loop *loop)
2915 loop_vec_info loop_vinfo;
2917 tree number_of_iterations = NULL;
2918 loop_vec_info inner_loop_vinfo = NULL;
2920 if (vect_print_dump_info (REPORT_DETAILS))
2921 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2923 /* Different restrictions apply when we are considering an inner-most loop,
2924 vs. an outer (nested) loop.
2925 (FORNOW. May want to relax some of these restrictions in the future). */
2929 /* Inner-most loop. We currently require that the number of BBs is
2930 exactly 2 (the header and latch). Vectorizable inner-most loops
2941 if (loop->num_nodes != 2)
2943 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2944 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2948 if (empty_block_p (loop->header))
2950 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2951 fprintf (vect_dump, "not vectorized: empty loop.");
2957 struct loop *innerloop = loop->inner;
2958 edge backedge, entryedge;
2960 /* Nested loop. We currently require that the loop is doubly-nested,
2961 contains a single inner loop, and the number of BBs is exactly 5.
2962 Vectorizable outer-loops look like this:
2974 The inner-loop has the properties expected of inner-most loops
2975 as described above. */
2977 if ((loop->inner)->inner || (loop->inner)->next)
2979 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2980 fprintf (vect_dump, "not vectorized: multiple nested loops.");
2984 /* Analyze the inner-loop. */
2985 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
2986 if (!inner_loop_vinfo)
2988 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2989 fprintf (vect_dump, "not vectorized: Bad inner loop.");
2993 if (!expr_invariant_in_loop_p (loop,
2994 LOOP_VINFO_NITERS (inner_loop_vinfo)))
2996 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2998 "not vectorized: inner-loop count not invariant.");
2999 destroy_loop_vec_info (inner_loop_vinfo, true);
3003 if (loop->num_nodes != 5)
3005 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3006 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
3007 destroy_loop_vec_info (inner_loop_vinfo, true);
3011 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
3012 backedge = EDGE_PRED (innerloop->header, 1);
3013 entryedge = EDGE_PRED (innerloop->header, 0);
3014 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
3016 backedge = EDGE_PRED (innerloop->header, 0);
3017 entryedge = EDGE_PRED (innerloop->header, 1);
3020 if (entryedge->src != loop->header
3021 || !single_exit (innerloop)
3022 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
3024 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3025 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
3026 destroy_loop_vec_info (inner_loop_vinfo, true);
3030 if (vect_print_dump_info (REPORT_DETAILS))
3031 fprintf (vect_dump, "Considering outer-loop vectorization.");
3034 if (!single_exit (loop)
3035 || EDGE_COUNT (loop->header->preds) != 2)
3037 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3039 if (!single_exit (loop))
3040 fprintf (vect_dump, "not vectorized: multiple exits.");
3041 else if (EDGE_COUNT (loop->header->preds) != 2)
3042 fprintf (vect_dump, "not vectorized: too many incoming edges.");
3044 if (inner_loop_vinfo)
3045 destroy_loop_vec_info (inner_loop_vinfo, true);
3049 /* We assume that the loop exit condition is at the end of the loop. i.e,
3050 that the loop is represented as a do-while (with a proper if-guard
3051 before the loop if needed), where the loop header contains all the
3052 executable statements, and the latch is empty. */
3053 if (!empty_block_p (loop->latch)
3054 || phi_nodes (loop->latch))
3056 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3057 fprintf (vect_dump, "not vectorized: unexpected loop form.");
3058 if (inner_loop_vinfo)
3059 destroy_loop_vec_info (inner_loop_vinfo, true);
3063 /* Make sure there exists a single-predecessor exit bb: */
3064 if (!single_pred_p (single_exit (loop)->dest))
3066 edge e = single_exit (loop);
3067 if (!(e->flags & EDGE_ABNORMAL))
3069 split_loop_exit_edge (e);
3070 if (vect_print_dump_info (REPORT_DETAILS))
3071 fprintf (vect_dump, "split exit edge.");
3075 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3076 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
3077 if (inner_loop_vinfo)
3078 destroy_loop_vec_info (inner_loop_vinfo, true);
3083 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3086 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3087 fprintf (vect_dump, "not vectorized: complicated exit condition.");
3088 if (inner_loop_vinfo)
3089 destroy_loop_vec_info (inner_loop_vinfo, true);
3093 if (!number_of_iterations)
3095 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3097 "not vectorized: number of iterations cannot be computed.");
3098 if (inner_loop_vinfo)
3099 destroy_loop_vec_info (inner_loop_vinfo, true);
3103 if (chrec_contains_undetermined (number_of_iterations))
3105 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
3106 fprintf (vect_dump, "Infinite number of iterations.");
3107 if (inner_loop_vinfo)
3108 destroy_loop_vec_info (inner_loop_vinfo, true);
3112 if (!NITERS_KNOWN_P (number_of_iterations))
3114 if (vect_print_dump_info (REPORT_DETAILS))
3116 fprintf (vect_dump, "Symbolic number of iterations is ");
3117 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
3120 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
3122 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3123 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
3124 if (inner_loop_vinfo)
3125 destroy_loop_vec_info (inner_loop_vinfo, false);
3129 loop_vinfo = new_loop_vec_info (loop);
3130 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3132 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
3134 /* CHECKME: May want to keep it around it in the future. */
3135 if (inner_loop_vinfo)
3136 destroy_loop_vec_info (inner_loop_vinfo, false);
3138 gcc_assert (!loop->aux);
3139 loop->aux = loop_vinfo;
3144 /* Function vect_analyze_loop.
3146 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3147 for it. The different analyses will record information in the
3148 loop_vec_info struct. */
3150 vect_analyze_loop (struct loop *loop)
3153 loop_vec_info loop_vinfo;
3155 if (vect_print_dump_info (REPORT_DETAILS))
3156 fprintf (vect_dump, "===== analyze_loop_nest =====");
3158 if (loop_outer (loop)
3159 && loop_vec_info_for_loop (loop_outer (loop))
3160 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
3162 if (vect_print_dump_info (REPORT_DETAILS))
3163 fprintf (vect_dump, "outer-loop already vectorized.");
3167 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3169 loop_vinfo = vect_analyze_loop_form (loop);
3172 if (vect_print_dump_info (REPORT_DETAILS))
3173 fprintf (vect_dump, "bad loop form.");
3177 /* Find all data references in the loop (which correspond to vdefs/vuses)
3178 and analyze their evolution in the loop.
3180 FORNOW: Handle only simple, array references, which
3181 alignment can be forced, and aligned pointer-references. */
3183 ok = vect_analyze_data_refs (loop_vinfo);
3186 if (vect_print_dump_info (REPORT_DETAILS))
3187 fprintf (vect_dump, "bad data references.");
3188 destroy_loop_vec_info (loop_vinfo, true);
3192 /* Classify all cross-iteration scalar data-flow cycles.
3193 Cross-iteration cycles caused by virtual phis are analyzed separately. */
3195 vect_analyze_scalar_cycles (loop_vinfo);
3197 vect_pattern_recog (loop_vinfo);
3199 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3201 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3204 if (vect_print_dump_info (REPORT_DETAILS))
3205 fprintf (vect_dump, "unexpected pattern.");
3206 destroy_loop_vec_info (loop_vinfo, true);
3210 /* Analyze the alignment of the data-refs in the loop.
3211 Fail if a data reference is found that cannot be vectorized. */
3213 ok = vect_analyze_data_refs_alignment (loop_vinfo);
3216 if (vect_print_dump_info (REPORT_DETAILS))
3217 fprintf (vect_dump, "bad data alignment.");
3218 destroy_loop_vec_info (loop_vinfo, true);
3222 ok = vect_determine_vectorization_factor (loop_vinfo);
3225 if (vect_print_dump_info (REPORT_DETAILS))
3226 fprintf (vect_dump, "can't determine vectorization factor.");
3227 destroy_loop_vec_info (loop_vinfo, true);
3231 /* Analyze data dependences between the data-refs in the loop.
3232 FORNOW: fail at the first data dependence that we encounter. */
3234 ok = vect_analyze_data_ref_dependences (loop_vinfo);
3237 if (vect_print_dump_info (REPORT_DETAILS))
3238 fprintf (vect_dump, "bad data dependence.");
3239 destroy_loop_vec_info (loop_vinfo, true);
3243 /* Analyze the access patterns of the data-refs in the loop (consecutive,
3244 complex, etc.). FORNOW: Only handle consecutive access pattern. */
3246 ok = vect_analyze_data_ref_accesses (loop_vinfo);
3249 if (vect_print_dump_info (REPORT_DETAILS))
3250 fprintf (vect_dump, "bad data access.");
3251 destroy_loop_vec_info (loop_vinfo, true);
3255 /* This pass will decide on using loop versioning and/or loop peeling in
3256 order to enhance the alignment of data references in the loop. */
3258 ok = vect_enhance_data_refs_alignment (loop_vinfo);
3261 if (vect_print_dump_info (REPORT_DETAILS))
3262 fprintf (vect_dump, "bad data alignment.");
3263 destroy_loop_vec_info (loop_vinfo, true);
3267 /* Scan all the operations in the loop and make sure they are
3270 ok = vect_analyze_operations (loop_vinfo);
3273 if (vect_print_dump_info (REPORT_DETAILS))
3274 fprintf (vect_dump, "bad operation or unsupported loop bound.");
3275 destroy_loop_vec_info (loop_vinfo, true);
3279 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;