1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
44 /* Main analysis functions. */
45 static loop_vec_info vect_analyze_loop_form (struct loop *);
46 static bool vect_analyze_data_refs (loop_vec_info);
47 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
48 static void vect_analyze_scalar_cycles (loop_vec_info);
49 static bool vect_analyze_data_ref_accesses (loop_vec_info);
50 static bool vect_analyze_data_ref_dependences (loop_vec_info);
51 static bool vect_analyze_data_refs_alignment (loop_vec_info);
52 static bool vect_compute_data_refs_alignment (loop_vec_info);
53 static bool vect_enhance_data_refs_alignment (loop_vec_info);
54 static bool vect_analyze_operations (loop_vec_info);
55 static bool vect_determine_vectorization_factor (loop_vec_info);
57 /* Utility functions for the analyses. */
58 static bool exist_non_indexing_operands_for_use_p (tree, tree);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_dependence_relation *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static bool vect_can_advance_ivs_p (loop_vec_info);
65 static void vect_update_misalignment_for_peel
66 (struct data_reference *, struct data_reference *, int npeel);
68 /* Function vect_determine_vectorization_factor
70 Determine the vectorization factor (VF). VF is the number of data elements
71 that are operated upon in parallel in a single iteration of the vectorized
72 loop. For example, when vectorizing a loop that operates on 4byte elements,
73 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
74 elements can fit in a single vector register.
76 We currently support vectorization of loops in which all types operated upon
77 are of the same size. Therefore this function currently sets VF according to
78 the size of the types operated upon, and fails if there are multiple sizes
81 VF is also the factor by which the loop iterations are strip-mined, e.g.:
88 for (i=0; i<N; i+=VF){
89 a[i:VF] = b[i:VF] + c[i:VF];
94 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
96 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
97 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
98 int nbbs = loop->num_nodes;
99 block_stmt_iterator si;
100 unsigned int vectorization_factor = 0;
105 stmt_vec_info stmt_info;
108 if (vect_print_dump_info (REPORT_DETAILS))
109 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
111 for (i = 0; i < nbbs; i++)
113 basic_block bb = bbs[i];
115 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
117 stmt_info = vinfo_for_stmt (phi);
118 if (vect_print_dump_info (REPORT_DETAILS))
120 fprintf (vect_dump, "==> examining phi: ");
121 print_generic_expr (vect_dump, phi, TDF_SLIM);
124 gcc_assert (stmt_info);
126 if (STMT_VINFO_RELEVANT_P (stmt_info))
128 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
129 scalar_type = TREE_TYPE (PHI_RESULT (phi));
131 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "get vectype for scalar type: ");
134 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
137 vectype = get_vectype_for_scalar_type (scalar_type);
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
143 "not vectorized: unsupported data-type ");
144 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
148 STMT_VINFO_VECTYPE (stmt_info) = vectype;
150 if (vect_print_dump_info (REPORT_DETAILS))
152 fprintf (vect_dump, "vectype: ");
153 print_generic_expr (vect_dump, vectype, TDF_SLIM);
156 nunits = TYPE_VECTOR_SUBPARTS (vectype);
157 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "nunits = %d", nunits);
160 if (!vectorization_factor
161 || (nunits > vectorization_factor))
162 vectorization_factor = nunits;
166 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
168 tree stmt = bsi_stmt (si);
169 stmt_info = vinfo_for_stmt (stmt);
171 if (vect_print_dump_info (REPORT_DETAILS))
173 fprintf (vect_dump, "==> examining statement: ");
174 print_generic_expr (vect_dump, stmt, TDF_SLIM);
177 gcc_assert (stmt_info);
179 /* skip stmts which do not need to be vectorized. */
180 if (!STMT_VINFO_RELEVANT_P (stmt_info)
181 && !STMT_VINFO_LIVE_P (stmt_info))
183 if (vect_print_dump_info (REPORT_DETAILS))
184 fprintf (vect_dump, "skip.");
188 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
190 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
192 fprintf (vect_dump, "not vectorized: irregular stmt.");
193 print_generic_expr (vect_dump, stmt, TDF_SLIM);
198 if (!GIMPLE_STMT_P (stmt)
199 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
201 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
203 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
204 print_generic_expr (vect_dump, stmt, TDF_SLIM);
209 if (STMT_VINFO_VECTYPE (stmt_info))
211 /* The only case when a vectype had been already set is for stmts
212 that contain a dataref, or for "pattern-stmts" (stmts generated
213 by the vectorizer to represent/replace a certain idiom). */
214 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
215 || is_pattern_stmt_p (stmt_info));
216 vectype = STMT_VINFO_VECTYPE (stmt_info);
222 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
223 && !is_pattern_stmt_p (stmt_info));
225 /* We generally set the vectype according to the type of the
227 For stmts whose result-type is different than the type of the
228 arguments (e.g. demotion, promotion), vectype will be reset
229 appropriately (later). Note that we have to visit the smallest
230 datatype in this function, because that determines the VF.
231 If the smallest datatype in the loop is present only as the
232 rhs of a promotion operation - we'd miss it here.
233 Such a case, where a variable of this datatype does not appear
234 in the lhs anywhere in the loop, can only occur if it's an
235 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
236 to have been optimized away by invariant motion. However, we
237 cannot rely on invariant motion to always take invariants out
238 of the loop, and so in the case of promotion we also have to
240 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
242 operation = GIMPLE_STMT_OPERAND (stmt, 1);
243 if (TREE_CODE (operation) == NOP_EXPR
244 || TREE_CODE (operation) == CONVERT_EXPR
245 || TREE_CODE (operation) == WIDEN_MULT_EXPR
246 || TREE_CODE (operation) == FLOAT_EXPR)
248 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
249 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
250 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
251 scalar_type = rhs_type;
254 if (vect_print_dump_info (REPORT_DETAILS))
256 fprintf (vect_dump, "get vectype for scalar type: ");
257 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
260 vectype = get_vectype_for_scalar_type (scalar_type);
263 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
266 "not vectorized: unsupported data-type ");
267 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
271 STMT_VINFO_VECTYPE (stmt_info) = vectype;
274 if (vect_print_dump_info (REPORT_DETAILS))
276 fprintf (vect_dump, "vectype: ");
277 print_generic_expr (vect_dump, vectype, TDF_SLIM);
280 nunits = TYPE_VECTOR_SUBPARTS (vectype);
281 if (vect_print_dump_info (REPORT_DETAILS))
282 fprintf (vect_dump, "nunits = %d", nunits);
284 if (!vectorization_factor
285 || (nunits > vectorization_factor))
286 vectorization_factor = nunits;
291 /* TODO: Analyze cost. Decide if worth while to vectorize. */
292 if (vect_print_dump_info (REPORT_DETAILS))
293 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
294 if (vectorization_factor <= 1)
296 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
297 fprintf (vect_dump, "not vectorized: unsupported data-type");
300 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
306 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
307 the number of created vector stmts depends on the unrolling factor). However,
308 the actual number of vector stmts for every SLP node depends on VF which is
309 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
310 In this function we assume that the inside costs calculated in
311 vect_model_xxx_cost are linear in ncopies. */
314 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
316 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
317 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
318 slp_instance instance;
320 if (vect_print_dump_info (REPORT_SLP))
321 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
323 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
324 /* We assume that costs are linear in ncopies. */
325 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
326 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
330 /* Function vect_analyze_operations.
332 Scan the loop stmts and make sure they are all vectorizable. */
335 vect_analyze_operations (loop_vec_info loop_vinfo)
337 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
338 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
339 int nbbs = loop->num_nodes;
340 block_stmt_iterator si;
341 unsigned int vectorization_factor = 0;
345 stmt_vec_info stmt_info;
346 bool need_to_vectorize = false;
347 int min_profitable_iters;
348 int min_scalar_loop_bound;
350 bool only_slp_in_loop = true;
352 if (vect_print_dump_info (REPORT_DETAILS))
353 fprintf (vect_dump, "=== vect_analyze_operations ===");
355 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
356 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
358 for (i = 0; i < nbbs; i++)
360 basic_block bb = bbs[i];
362 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
366 stmt_info = vinfo_for_stmt (phi);
367 if (vect_print_dump_info (REPORT_DETAILS))
369 fprintf (vect_dump, "examining phi: ");
370 print_generic_expr (vect_dump, phi, TDF_SLIM);
373 if (! is_loop_header_bb_p (bb))
375 /* inner-loop loop-closed exit phi in outer-loop vectorization
376 (i.e. a phi in the tail of the outer-loop).
377 FORNOW: we currently don't support the case that these phis
378 are not used in the outerloop, cause this case requires
379 to actually do something here. */
380 if (!STMT_VINFO_RELEVANT_P (stmt_info)
381 || STMT_VINFO_LIVE_P (stmt_info))
383 if (vect_print_dump_info (REPORT_DETAILS))
385 "Unsupported loop-closed phi in outer-loop.");
391 gcc_assert (stmt_info);
393 if (STMT_VINFO_LIVE_P (stmt_info))
395 /* FORNOW: not yet supported. */
396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
397 fprintf (vect_dump, "not vectorized: value used after loop.");
401 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
402 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
404 /* A scalar-dependence cycle that we don't support. */
405 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
406 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
410 if (STMT_VINFO_RELEVANT_P (stmt_info))
412 need_to_vectorize = true;
413 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
414 ok = vectorizable_induction (phi, NULL, NULL);
419 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
422 "not vectorized: relevant phi not supported: ");
423 print_generic_expr (vect_dump, phi, TDF_SLIM);
429 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
431 tree stmt = bsi_stmt (si);
432 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
433 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
435 if (vect_print_dump_info (REPORT_DETAILS))
437 fprintf (vect_dump, "==> examining statement: ");
438 print_generic_expr (vect_dump, stmt, TDF_SLIM);
441 gcc_assert (stmt_info);
443 /* skip stmts which do not need to be vectorized.
444 this is expected to include:
445 - the COND_EXPR which is the loop exit condition
446 - any LABEL_EXPRs in the loop
447 - computations that are used only for array indexing or loop
450 if (!STMT_VINFO_RELEVANT_P (stmt_info)
451 && !STMT_VINFO_LIVE_P (stmt_info))
453 if (vect_print_dump_info (REPORT_DETAILS))
454 fprintf (vect_dump, "irrelevant.");
458 switch (STMT_VINFO_DEF_TYPE (stmt_info))
463 case vect_reduction_def:
464 gcc_assert (relevance == vect_used_in_outer
465 || relevance == vect_used_in_outer_by_reduction
466 || relevance == vect_unused_in_loop);
469 case vect_induction_def:
470 case vect_constant_def:
471 case vect_invariant_def:
472 case vect_unknown_def_type:
477 if (STMT_VINFO_RELEVANT_P (stmt_info))
479 gcc_assert (GIMPLE_STMT_P (stmt)
480 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
481 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
482 need_to_vectorize = true;
486 if (STMT_VINFO_RELEVANT_P (stmt_info)
487 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
488 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
489 || vectorizable_type_demotion (stmt, NULL, NULL)
490 || vectorizable_conversion (stmt, NULL, NULL, NULL)
491 || vectorizable_operation (stmt, NULL, NULL, NULL)
492 || vectorizable_assignment (stmt, NULL, NULL, NULL)
493 || vectorizable_load (stmt, NULL, NULL, NULL)
494 || vectorizable_call (stmt, NULL, NULL)
495 || vectorizable_store (stmt, NULL, NULL, NULL)
496 || vectorizable_condition (stmt, NULL, NULL)
497 || vectorizable_reduction (stmt, NULL, NULL));
501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
503 fprintf (vect_dump, "not vectorized: relevant stmt not ");
504 fprintf (vect_dump, "supported: ");
505 print_generic_expr (vect_dump, stmt, TDF_SLIM);
510 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
511 need extra handling, except for vectorizable reductions. */
512 if (STMT_VINFO_LIVE_P (stmt_info)
513 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
514 ok = vectorizable_live_operation (stmt, NULL, NULL);
518 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
520 fprintf (vect_dump, "not vectorized: live stmt not ");
521 fprintf (vect_dump, "supported: ");
522 print_generic_expr (vect_dump, stmt, TDF_SLIM);
527 if (!PURE_SLP_STMT (stmt_info))
529 /* STMT needs loop-based vectorization. */
530 only_slp_in_loop = false;
532 /* Groups of strided accesses whose size is not a power of 2 are
533 not vectorizable yet using loop-vectorization. Therefore, if
534 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
535 both SLPed and loop-based vectorzed), the loop cannot be
537 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
538 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
539 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
541 if (vect_print_dump_info (REPORT_DETAILS))
543 fprintf (vect_dump, "not vectorized: the size of group "
544 "of strided accesses is not a power of 2");
545 print_generic_expr (vect_dump, stmt, TDF_SLIM);
553 /* All operations in the loop are either irrelevant (deal with loop
554 control, or dead), or only used outside the loop and can be moved
555 out of the loop (e.g. invariants, inductions). The loop can be
556 optimized away by scalar optimizations. We're better off not
557 touching this loop. */
558 if (!need_to_vectorize)
560 if (vect_print_dump_info (REPORT_DETAILS))
562 "All the computation can be taken out of the loop.");
563 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
565 "not vectorized: redundant loop. no profit to vectorize.");
569 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
570 vectorization factor of the loop is the unrolling factor required by the
571 SLP instances. If that unrolling factor is 1, we say, that we perform
572 pure SLP on loop - cross iteration parallelism is not exploited. */
573 if (only_slp_in_loop)
574 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
576 vectorization_factor = least_common_multiple (vectorization_factor,
577 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
579 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
581 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
582 && vect_print_dump_info (REPORT_DETAILS))
584 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
585 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
587 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
588 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
590 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
591 fprintf (vect_dump, "not vectorized: iteration count too small.");
592 if (vect_print_dump_info (REPORT_DETAILS))
593 fprintf (vect_dump,"not vectorized: iteration count smaller than "
594 "vectorization factor.");
598 /* Analyze cost. Decide if worth while to vectorize. */
600 /* Once VF is set, SLP costs should be updated since the number of created
601 vector stmts depends on VF. */
602 vect_update_slp_costs_according_to_vf (loop_vinfo);
604 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
605 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
606 if (min_profitable_iters < 0)
608 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
609 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
610 if (vect_print_dump_info (REPORT_DETAILS))
611 fprintf (vect_dump, "not vectorized: vector version will never be "
616 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
617 * vectorization_factor) - 1);
619 /* Use the cost model only if it is more conservative than user specified
622 th = (unsigned) min_scalar_loop_bound;
623 if (min_profitable_iters
624 && (!min_scalar_loop_bound
625 || min_profitable_iters > min_scalar_loop_bound))
626 th = (unsigned) min_profitable_iters;
628 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
629 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
631 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
632 fprintf (vect_dump, "not vectorized: vectorization not "
634 if (vect_print_dump_info (REPORT_DETAILS))
635 fprintf (vect_dump, "not vectorized: iteration count smaller than "
636 "user specified loop bound parameter or minimum "
637 "profitable iterations (whichever is more conservative).");
641 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
642 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
643 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
645 if (vect_print_dump_info (REPORT_DETAILS))
646 fprintf (vect_dump, "epilog loop required.");
647 if (!vect_can_advance_ivs_p (loop_vinfo))
649 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
651 "not vectorized: can't create epilog loop 1.");
654 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
656 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
658 "not vectorized: can't create epilog loop 2.");
667 /* Function exist_non_indexing_operands_for_use_p
669 USE is one of the uses attached to STMT. Check if USE is
670 used in STMT for anything other than indexing an array. */
673 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
676 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
678 /* USE corresponds to some operand in STMT. If there is no data
679 reference in STMT, then any operand that corresponds to USE
680 is not indexing an array. */
681 if (!STMT_VINFO_DATA_REF (stmt_info))
684 /* STMT has a data_ref. FORNOW this means that its of one of
688 (This should have been verified in analyze_data_refs).
690 'var' in the second case corresponds to a def, not a use,
691 so USE cannot correspond to any operands that are not used
694 Therefore, all we need to check is if STMT falls into the
695 first case, and whether var corresponds to USE. */
697 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
700 operand = GIMPLE_STMT_OPERAND (stmt, 1);
702 if (TREE_CODE (operand) != SSA_NAME)
712 /* Function vect_analyze_scalar_cycles_1.
714 Examine the cross iteration def-use cycles of scalar variables
715 in LOOP. LOOP_VINFO represents the loop that is noe being
716 considered for vectorization (can be LOOP, or an outer-loop
720 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
723 basic_block bb = loop->header;
725 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
727 if (vect_print_dump_info (REPORT_DETAILS))
728 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
730 /* First - identify all inductions. */
731 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
733 tree access_fn = NULL;
734 tree def = PHI_RESULT (phi);
735 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
737 if (vect_print_dump_info (REPORT_DETAILS))
739 fprintf (vect_dump, "Analyze phi: ");
740 print_generic_expr (vect_dump, phi, TDF_SLIM);
743 /* Skip virtual phi's. The data dependences that are associated with
744 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
745 if (!is_gimple_reg (SSA_NAME_VAR (def)))
748 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
750 /* Analyze the evolution function. */
751 access_fn = analyze_scalar_evolution (loop, def);
752 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
754 fprintf (vect_dump, "Access function of PHI: ");
755 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
759 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
761 VEC_safe_push (tree, heap, worklist, phi);
765 if (vect_print_dump_info (REPORT_DETAILS))
766 fprintf (vect_dump, "Detected induction.");
767 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
771 /* Second - identify all reductions. */
772 while (VEC_length (tree, worklist) > 0)
774 tree phi = VEC_pop (tree, worklist);
775 tree def = PHI_RESULT (phi);
776 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
779 if (vect_print_dump_info (REPORT_DETAILS))
781 fprintf (vect_dump, "Analyze phi: ");
782 print_generic_expr (vect_dump, phi, TDF_SLIM);
785 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
786 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
788 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
791 if (vect_print_dump_info (REPORT_DETAILS))
792 fprintf (vect_dump, "Detected reduction.");
793 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
798 if (vect_print_dump_info (REPORT_DETAILS))
799 fprintf (vect_dump, "Unknown def-use cycle pattern.");
802 VEC_free (tree, heap, worklist);
807 /* Function vect_analyze_scalar_cycles.
809 Examine the cross iteration def-use cycles of scalar variables, by
810 analyzing the loop-header PHIs of scalar variables; Classify each
811 cycle as one of the following: invariant, induction, reduction, unknown.
812 We do that for the loop represented by LOOP_VINFO, and also to its
813 inner-loop, if exists.
814 Examples for scalar cycles:
829 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
831 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
833 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
835 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
836 Reductions in such inner-loop therefore have different properties than
837 the reductions in the nest that gets vectorized:
838 1. When vectorized, they are executed in the same order as in the original
839 scalar loop, so we can't change the order of computation when
841 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
842 current checks are too strict. */
845 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
849 /* Function vect_insert_into_interleaving_chain.
851 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
854 vect_insert_into_interleaving_chain (struct data_reference *dra,
855 struct data_reference *drb)
857 tree prev, next, next_init;
858 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
859 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
861 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
862 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
865 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
866 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
869 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
870 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
874 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
877 /* We got to the end of the list. Insert here. */
878 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
879 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
883 /* Function vect_update_interleaving_chain.
885 For two data-refs DRA and DRB that are a part of a chain interleaved data
886 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
888 There are four possible cases:
889 1. New stmts - both DRA and DRB are not a part of any chain:
892 2. DRB is a part of a chain and DRA is not:
893 no need to update FIRST_DR
894 no need to insert DRB
895 insert DRA according to init
896 3. DRA is a part of a chain and DRB is not:
897 if (init of FIRST_DR > init of DRB)
899 NEXT(FIRST_DR) = previous FIRST_DR
901 insert DRB according to its init
902 4. both DRA and DRB are in some interleaving chains:
903 choose the chain with the smallest init of FIRST_DR
904 insert the nodes of the second chain into the first one. */
907 vect_update_interleaving_chain (struct data_reference *drb,
908 struct data_reference *dra)
910 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
911 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
912 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
913 tree node, prev, next, node_init, first_stmt;
915 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
916 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
918 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
919 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
920 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
924 /* 2. DRB is a part of a chain and DRA is not. */
925 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
927 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
928 /* Insert DRA into the chain of DRB. */
929 vect_insert_into_interleaving_chain (dra, drb);
933 /* 3. DRA is a part of a chain and DRB is not. */
934 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
936 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
937 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
941 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
943 /* DRB's init is smaller than the init of the stmt previously marked
944 as the first stmt of the interleaving chain of DRA. Therefore, we
945 update FIRST_STMT and put DRB in the head of the list. */
946 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
947 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
949 /* Update all the stmts in the list to point to the new FIRST_STMT. */
950 tmp = old_first_stmt;
953 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
954 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
959 /* Insert DRB in the list of DRA. */
960 vect_insert_into_interleaving_chain (drb, dra);
961 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
966 /* 4. both DRA and DRB are in some interleaving chains. */
967 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
968 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
969 if (first_a == first_b)
971 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
972 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
974 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
976 /* Insert the nodes of DRA chain into the DRB chain.
977 After inserting a node, continue from this node of the DRB chain (don't
978 start from the beginning. */
979 node = DR_GROUP_FIRST_DR (stmtinfo_a);
980 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
981 first_stmt = first_b;
985 /* Insert the nodes of DRB chain into the DRA chain.
986 After inserting a node, continue from this node of the DRA chain (don't
987 start from the beginning. */
988 node = DR_GROUP_FIRST_DR (stmtinfo_b);
989 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
990 first_stmt = first_a;
995 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
996 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
999 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1000 if (tree_int_cst_compare (next_init, node_init) > 0)
1003 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1004 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
1009 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
1013 /* We got to the end of the list. Insert here. */
1014 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
1015 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
1018 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
1019 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1024 /* Function vect_equal_offsets.
1026 Check if OFFSET1 and OFFSET2 are identical expressions. */
1029 vect_equal_offsets (tree offset1, tree offset2)
1033 STRIP_NOPS (offset1);
1034 STRIP_NOPS (offset2);
1036 if (offset1 == offset2)
1039 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1040 || !BINARY_CLASS_P (offset1)
1041 || !BINARY_CLASS_P (offset2))
1044 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1045 TREE_OPERAND (offset2, 0));
1046 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1047 TREE_OPERAND (offset2, 1));
1049 return (res0 && res1);
1053 /* Function vect_check_interleaving.
1055 Check if DRA and DRB are a part of interleaving. In case they are, insert
1056 DRA and DRB in an interleaving chain. */
1059 vect_check_interleaving (struct data_reference *dra,
1060 struct data_reference *drb)
1062 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1064 /* Check that the data-refs have same first location (except init) and they
1065 are both either store or load (not load and store). */
1066 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1067 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1068 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1069 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1070 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1071 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1072 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1073 || DR_IS_READ (dra) != DR_IS_READ (drb))
1077 1. data-refs are of the same type
1078 2. their steps are equal
1079 3. the step is greater than the difference between data-refs' inits */
1080 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1081 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1083 if (type_size_a != type_size_b
1084 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
1087 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1088 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1089 step = TREE_INT_CST_LOW (DR_STEP (dra));
1091 if (init_a > init_b)
1093 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1094 and DRB is accessed before DRA. */
1095 diff_mod_size = (init_a - init_b) % type_size_a;
1097 if ((init_a - init_b) > step)
1100 if (diff_mod_size == 0)
1102 vect_update_interleaving_chain (drb, dra);
1103 if (vect_print_dump_info (REPORT_DR_DETAILS))
1105 fprintf (vect_dump, "Detected interleaving ");
1106 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1107 fprintf (vect_dump, " and ");
1108 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1115 /* If init_b == init_a + the size of the type * k, we have an
1116 interleaving, and DRA is accessed before DRB. */
1117 diff_mod_size = (init_b - init_a) % type_size_a;
1119 if ((init_b - init_a) > step)
1122 if (diff_mod_size == 0)
1124 vect_update_interleaving_chain (dra, drb);
1125 if (vect_print_dump_info (REPORT_DR_DETAILS))
1127 fprintf (vect_dump, "Detected interleaving ");
1128 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1129 fprintf (vect_dump, " and ");
1130 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1137 /* Check if data references pointed by DR_I and DR_J are same or
1138 belong to same interleaving group. Return FALSE if drs are
1139 different, otherwise return TRUE. */
1142 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1144 tree stmt_i = DR_STMT (dr_i);
1145 tree stmt_j = DR_STMT (dr_j);
1147 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1148 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1149 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1150 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1151 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1157 /* If address ranges represented by DDR_I and DDR_J are equal,
1158 return TRUE, otherwise return FALSE. */
1161 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1163 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1164 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1165 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1166 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1172 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1173 tested at run-time. Return TRUE if DDR was successfully inserted.
1174 Return false if versioning is not supported. */
1177 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1179 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1181 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1184 if (vect_print_dump_info (REPORT_DR_DETAILS))
1186 fprintf (vect_dump, "mark for run-time aliasing test between ");
1187 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1188 fprintf (vect_dump, " and ");
1189 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1194 if (vect_print_dump_info (REPORT_DR_DETAILS))
1195 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1199 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1202 if (vect_print_dump_info (REPORT_DR_DETAILS))
1203 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1207 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1211 /* Function vect_analyze_data_ref_dependence.
1213 Return TRUE if there (might) exist a dependence between a memory-reference
1214 DRA and a memory-reference DRB. When versioning for alias may check a
1215 dependence at run-time, return FALSE. */
1218 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1219 loop_vec_info loop_vinfo)
1222 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1223 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1224 struct data_reference *dra = DDR_A (ddr);
1225 struct data_reference *drb = DDR_B (ddr);
1226 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1227 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1228 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1229 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1230 lambda_vector dist_v;
1231 unsigned int loop_depth;
1233 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1235 /* Independent data accesses. */
1236 vect_check_interleaving (dra, drb);
1240 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1243 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1245 if (vect_print_dump_info (REPORT_DR_DETAILS))
1248 "versioning for alias required: can't determine dependence between ");
1249 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1250 fprintf (vect_dump, " and ");
1251 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1253 /* Add to list of ddrs that need to be tested at run-time. */
1254 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1257 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1259 if (vect_print_dump_info (REPORT_DR_DETAILS))
1261 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1262 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1263 fprintf (vect_dump, " and ");
1264 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1266 /* Add to list of ddrs that need to be tested at run-time. */
1267 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1270 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1271 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1273 int dist = dist_v[loop_depth];
1275 if (vect_print_dump_info (REPORT_DR_DETAILS))
1276 fprintf (vect_dump, "dependence distance = %d.", dist);
1278 /* Same loop iteration. */
1279 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1281 /* Two references with distance zero have the same alignment. */
1282 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1283 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1284 if (vect_print_dump_info (REPORT_ALIGNMENT))
1285 fprintf (vect_dump, "accesses have the same alignment.");
1286 if (vect_print_dump_info (REPORT_DR_DETAILS))
1288 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1289 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1290 fprintf (vect_dump, " and ");
1291 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1294 /* For interleaving, mark that there is a read-write dependency if
1295 necessary. We check before that one of the data-refs is store. */
1296 if (DR_IS_READ (dra))
1297 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1300 if (DR_IS_READ (drb))
1301 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1307 if (abs (dist) >= vectorization_factor
1308 || (dist > 0 && DDR_REVERSED_P (ddr)))
1310 /* Dependence distance does not create dependence, as far as
1311 vectorization is concerned, in this case. If DDR_REVERSED_P the
1312 order of the data-refs in DDR was reversed (to make distance
1313 vector positive), and the actual distance is negative. */
1314 if (vect_print_dump_info (REPORT_DR_DETAILS))
1315 fprintf (vect_dump, "dependence distance >= VF or negative.");
1319 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1322 "not vectorized, possible dependence "
1323 "between data-refs ");
1324 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1325 fprintf (vect_dump, " and ");
1326 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1335 /* Function vect_analyze_data_ref_dependences.
1337 Examine all the data references in the loop, and make sure there do not
1338 exist any data dependences between them. */
1341 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1344 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1345 struct data_dependence_relation *ddr;
1347 if (vect_print_dump_info (REPORT_DETAILS))
1348 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1350 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1351 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1358 /* Function vect_compute_data_ref_alignment
1360 Compute the misalignment of the data reference DR.
1363 1. If during the misalignment computation it is found that the data reference
1364 cannot be vectorized then false is returned.
1365 2. DR_MISALIGNMENT (DR) is defined.
1367 FOR NOW: No analysis is actually performed. Misalignment is calculated
1368 only for trivial cases. TODO. */
1371 vect_compute_data_ref_alignment (struct data_reference *dr)
1373 tree stmt = DR_STMT (dr);
1374 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1375 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1376 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1377 tree ref = DR_REF (dr);
1379 tree base, base_addr;
1382 tree aligned_to, alignment;
1384 if (vect_print_dump_info (REPORT_DETAILS))
1385 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1387 /* Initialize misalignment to unknown. */
1388 SET_DR_MISALIGNMENT (dr, -1);
1390 misalign = DR_INIT (dr);
1391 aligned_to = DR_ALIGNED_TO (dr);
1392 base_addr = DR_BASE_ADDRESS (dr);
1394 /* In case the dataref is in an inner-loop of the loop that is being
1395 vectorized (LOOP), we use the base and misalignment information
1396 relative to the outer-loop (LOOP). This is ok only if the misalignment
1397 stays the same throughout the execution of the inner-loop, which is why
1398 we have to check that the stride of the dataref in the inner-loop evenly
1399 divides by the vector size. */
1400 if (nested_in_vect_loop_p (loop, stmt))
1402 tree step = DR_STEP (dr);
1403 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1405 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1407 if (vect_print_dump_info (REPORT_ALIGNMENT))
1408 fprintf (vect_dump, "inner step divides the vector-size.");
1409 misalign = STMT_VINFO_DR_INIT (stmt_info);
1410 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1411 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1415 if (vect_print_dump_info (REPORT_ALIGNMENT))
1416 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1417 misalign = NULL_TREE;
1421 base = build_fold_indirect_ref (base_addr);
1422 vectype = STMT_VINFO_VECTYPE (stmt_info);
1423 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1425 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1428 if (vect_print_dump_info (REPORT_ALIGNMENT))
1430 fprintf (vect_dump, "Unknown alignment for access: ");
1431 print_generic_expr (vect_dump, base, TDF_SLIM);
1437 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1439 || (TREE_CODE (base_addr) == SSA_NAME
1440 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1441 TREE_TYPE (base_addr)))),
1443 base_aligned = true;
1445 base_aligned = false;
1449 /* Do not change the alignment of global variables if
1450 flag_section_anchors is enabled. */
1451 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1452 || (TREE_STATIC (base) && flag_section_anchors))
1454 if (vect_print_dump_info (REPORT_DETAILS))
1456 fprintf (vect_dump, "can't force alignment of ref: ");
1457 print_generic_expr (vect_dump, ref, TDF_SLIM);
1462 /* Force the alignment of the decl.
1463 NOTE: This is the only change to the code we make during
1464 the analysis phase, before deciding to vectorize the loop. */
1465 if (vect_print_dump_info (REPORT_DETAILS))
1466 fprintf (vect_dump, "force alignment");
1467 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1468 DECL_USER_ALIGN (base) = 1;
1471 /* At this point we assume that the base is aligned. */
1472 gcc_assert (base_aligned
1473 || (TREE_CODE (base) == VAR_DECL
1474 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1476 /* Modulo alignment. */
1477 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1479 if (!host_integerp (misalign, 1))
1481 /* Negative or overflowed misalignment value. */
1482 if (vect_print_dump_info (REPORT_DETAILS))
1483 fprintf (vect_dump, "unexpected misalign value");
1487 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1489 if (vect_print_dump_info (REPORT_DETAILS))
1491 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1492 print_generic_expr (vect_dump, ref, TDF_SLIM);
1499 /* Function vect_compute_data_refs_alignment
1501 Compute the misalignment of data references in the loop.
1502 Return FALSE if a data reference is found that cannot be vectorized. */
1505 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1507 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1508 struct data_reference *dr;
1511 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1512 if (!vect_compute_data_ref_alignment (dr))
1519 /* Function vect_update_misalignment_for_peel
1521 DR - the data reference whose misalignment is to be adjusted.
1522 DR_PEEL - the data reference whose misalignment is being made
1523 zero in the vector loop by the peel.
1524 NPEEL - the number of iterations in the peel loop if the misalignment
1525 of DR_PEEL is known at compile time. */
1528 vect_update_misalignment_for_peel (struct data_reference *dr,
1529 struct data_reference *dr_peel, int npeel)
1532 VEC(dr_p,heap) *same_align_drs;
1533 struct data_reference *current_dr;
1534 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1535 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1536 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1537 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1539 /* For interleaved data accesses the step in the loop must be multiplied by
1540 the size of the interleaving group. */
1541 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1542 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1543 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1544 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1546 /* It can be assumed that the data refs with the same alignment as dr_peel
1547 are aligned in the vector loop. */
1549 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1550 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1552 if (current_dr != dr)
1554 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1555 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1556 SET_DR_MISALIGNMENT (dr, 0);
1560 if (known_alignment_for_access_p (dr)
1561 && known_alignment_for_access_p (dr_peel))
1563 int misal = DR_MISALIGNMENT (dr);
1564 misal += npeel * dr_size;
1565 misal %= UNITS_PER_SIMD_WORD;
1566 SET_DR_MISALIGNMENT (dr, misal);
1570 if (vect_print_dump_info (REPORT_DETAILS))
1571 fprintf (vect_dump, "Setting misalignment to -1.");
1572 SET_DR_MISALIGNMENT (dr, -1);
1576 /* Function vect_verify_datarefs_alignment
1578 Return TRUE if all data references in the loop can be
1579 handled with respect to alignment. */
1582 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1584 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1585 struct data_reference *dr;
1586 enum dr_alignment_support supportable_dr_alignment;
1589 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1591 tree stmt = DR_STMT (dr);
1592 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1594 /* For interleaving, only the alignment of the first access matters. */
1595 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1596 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1599 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1600 if (!supportable_dr_alignment)
1602 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1604 if (DR_IS_READ (dr))
1606 "not vectorized: unsupported unaligned load.");
1609 "not vectorized: unsupported unaligned store.");
1613 if (supportable_dr_alignment != dr_aligned
1614 && vect_print_dump_info (REPORT_ALIGNMENT))
1615 fprintf (vect_dump, "Vectorizing an unaligned access.");
1621 /* Function vector_alignment_reachable_p
1623 Return true if vector alignment for DR is reachable by peeling
1624 a few loop iterations. Return false otherwise. */
1627 vector_alignment_reachable_p (struct data_reference *dr)
1629 tree stmt = DR_STMT (dr);
1630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1631 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1633 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1635 /* For interleaved access we peel only if number of iterations in
1636 the prolog loop ({VF - misalignment}), is a multiple of the
1637 number of the interleaved accesses. */
1638 int elem_size, mis_in_elements;
1639 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1641 /* FORNOW: handle only known alignment. */
1642 if (!known_alignment_for_access_p (dr))
1645 elem_size = UNITS_PER_SIMD_WORD / nelements;
1646 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1648 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1652 /* If misalignment is known at the compile time then allow peeling
1653 only if natural alignment is reachable through peeling. */
1654 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1656 HOST_WIDE_INT elmsize =
1657 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1658 if (vect_print_dump_info (REPORT_DETAILS))
1660 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1661 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1663 if (DR_MISALIGNMENT (dr) % elmsize)
1665 if (vect_print_dump_info (REPORT_DETAILS))
1666 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1671 if (!known_alignment_for_access_p (dr))
1673 tree type = (TREE_TYPE (DR_REF (dr)));
1674 tree ba = DR_BASE_OBJECT (dr);
1675 bool is_packed = false;
1678 is_packed = contains_packed_reference (ba);
1680 if (vect_print_dump_info (REPORT_DETAILS))
1681 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1682 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1691 /* Function vect_enhance_data_refs_alignment
1693 This pass will use loop versioning and loop peeling in order to enhance
1694 the alignment of data references in the loop.
1696 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1697 original loop is to be vectorized; Any other loops that are created by
1698 the transformations performed in this pass - are not supposed to be
1699 vectorized. This restriction will be relaxed.
1701 This pass will require a cost model to guide it whether to apply peeling
1702 or versioning or a combination of the two. For example, the scheme that
1703 intel uses when given a loop with several memory accesses, is as follows:
1704 choose one memory access ('p') which alignment you want to force by doing
1705 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1706 other accesses are not necessarily aligned, or (2) use loop versioning to
1707 generate one loop in which all accesses are aligned, and another loop in
1708 which only 'p' is necessarily aligned.
1710 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1711 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1712 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1714 Devising a cost model is the most critical aspect of this work. It will
1715 guide us on which access to peel for, whether to use loop versioning, how
1716 many versions to create, etc. The cost model will probably consist of
1717 generic considerations as well as target specific considerations (on
1718 powerpc for example, misaligned stores are more painful than misaligned
1721 Here are the general steps involved in alignment enhancements:
1723 -- original loop, before alignment analysis:
1724 for (i=0; i<N; i++){
1725 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1726 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1729 -- After vect_compute_data_refs_alignment:
1730 for (i=0; i<N; i++){
1731 x = q[i]; # DR_MISALIGNMENT(q) = 3
1732 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1735 -- Possibility 1: we do loop versioning:
1737 for (i=0; i<N; i++){ # loop 1A
1738 x = q[i]; # DR_MISALIGNMENT(q) = 3
1739 p[i] = y; # DR_MISALIGNMENT(p) = 0
1743 for (i=0; i<N; i++){ # loop 1B
1744 x = q[i]; # DR_MISALIGNMENT(q) = 3
1745 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1749 -- Possibility 2: we do loop peeling:
1750 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1754 for (i = 3; i < N; i++){ # loop 2A
1755 x = q[i]; # DR_MISALIGNMENT(q) = 0
1756 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1759 -- Possibility 3: combination of loop peeling and versioning:
1760 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1765 for (i = 3; i<N; i++){ # loop 3A
1766 x = q[i]; # DR_MISALIGNMENT(q) = 0
1767 p[i] = y; # DR_MISALIGNMENT(p) = 0
1771 for (i = 3; i<N; i++){ # loop 3B
1772 x = q[i]; # DR_MISALIGNMENT(q) = 0
1773 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1777 These loops are later passed to loop_transform to be vectorized. The
1778 vectorizer will use the alignment information to guide the transformation
1779 (whether to generate regular loads/stores, or with special handling for
1783 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1785 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1786 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1787 enum dr_alignment_support supportable_dr_alignment;
1788 struct data_reference *dr0 = NULL;
1789 struct data_reference *dr;
1791 bool do_peeling = false;
1792 bool do_versioning = false;
1795 stmt_vec_info stmt_info;
1796 int vect_versioning_for_alias_required;
1798 if (vect_print_dump_info (REPORT_DETAILS))
1799 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1801 /* While cost model enhancements are expected in the future, the high level
1802 view of the code at this time is as follows:
1804 A) If there is a misaligned write then see if peeling to align this write
1805 can make all data references satisfy vect_supportable_dr_alignment.
1806 If so, update data structures as needed and return true. Note that
1807 at this time vect_supportable_dr_alignment is known to return false
1808 for a misaligned write.
1810 B) If peeling wasn't possible and there is a data reference with an
1811 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1812 then see if loop versioning checks can be used to make all data
1813 references satisfy vect_supportable_dr_alignment. If so, update
1814 data structures as needed and return true.
1816 C) If neither peeling nor versioning were successful then return false if
1817 any data reference does not satisfy vect_supportable_dr_alignment.
1819 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1821 Note, Possibility 3 above (which is peeling and versioning together) is not
1822 being done at this time. */
1824 /* (1) Peeling to force alignment. */
1826 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1828 + How many accesses will become aligned due to the peeling
1829 - How many accesses will become unaligned due to the peeling,
1830 and the cost of misaligned accesses.
1831 - The cost of peeling (the extra runtime checks, the increase
1834 The scheme we use FORNOW: peel to force the alignment of the first
1835 misaligned store in the loop.
1836 Rationale: misaligned stores are not yet supported.
1838 TODO: Use a cost model. */
1840 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1842 stmt = DR_STMT (dr);
1843 stmt_info = vinfo_for_stmt (stmt);
1845 /* For interleaving, only the alignment of the first access
1847 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1848 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1851 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1853 do_peeling = vector_alignment_reachable_p (dr);
1856 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1857 fprintf (vect_dump, "vector alignment may not be reachable");
1862 vect_versioning_for_alias_required =
1863 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1865 /* Temporarily, if versioning for alias is required, we disable peeling
1866 until we support peeling and versioning. Often peeling for alignment
1867 will require peeling for loop-bound, which in turn requires that we
1868 know how to adjust the loop ivs after the loop. */
1869 if (vect_versioning_for_alias_required
1870 || !vect_can_advance_ivs_p (loop_vinfo)
1871 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1878 tree stmt = DR_STMT (dr0);
1879 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1880 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1881 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1883 if (known_alignment_for_access_p (dr0))
1885 /* Since it's known at compile time, compute the number of iterations
1886 in the peeled loop (the peeling factor) for use in updating
1887 DR_MISALIGNMENT values. The peeling factor is the vectorization
1888 factor minus the misalignment as an element count. */
1889 mis = DR_MISALIGNMENT (dr0);
1890 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1891 npeel = nelements - mis;
1893 /* For interleaved data access every iteration accesses all the
1894 members of the group, therefore we divide the number of iterations
1895 by the group size. */
1896 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1897 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1898 npeel /= DR_GROUP_SIZE (stmt_info);
1900 if (vect_print_dump_info (REPORT_DETAILS))
1901 fprintf (vect_dump, "Try peeling by %d", npeel);
1904 /* Ensure that all data refs can be vectorized after the peel. */
1905 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1907 int save_misalignment;
1912 stmt = DR_STMT (dr);
1913 stmt_info = vinfo_for_stmt (stmt);
1914 /* For interleaving, only the alignment of the first access
1916 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1917 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1920 save_misalignment = DR_MISALIGNMENT (dr);
1921 vect_update_misalignment_for_peel (dr, dr0, npeel);
1922 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1923 SET_DR_MISALIGNMENT (dr, save_misalignment);
1925 if (!supportable_dr_alignment)
1934 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1935 If the misalignment of DR_i is identical to that of dr0 then set
1936 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1937 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1938 by the peeling factor times the element size of DR_i (MOD the
1939 vectorization factor times the size). Otherwise, the
1940 misalignment of DR_i must be set to unknown. */
1941 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1943 vect_update_misalignment_for_peel (dr, dr0, npeel);
1945 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1946 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1947 SET_DR_MISALIGNMENT (dr0, 0);
1948 if (vect_print_dump_info (REPORT_ALIGNMENT))
1949 fprintf (vect_dump, "Alignment of access forced using peeling.");
1951 if (vect_print_dump_info (REPORT_DETAILS))
1952 fprintf (vect_dump, "Peeling for alignment will be applied.");
1954 stat = vect_verify_datarefs_alignment (loop_vinfo);
1961 /* (2) Versioning to force alignment. */
1963 /* Try versioning if:
1964 1) flag_tree_vect_loop_version is TRUE
1965 2) optimize_size is FALSE
1966 3) there is at least one unsupported misaligned data ref with an unknown
1968 4) all misaligned data refs with a known misalignment are supported, and
1969 5) the number of runtime alignment checks is within reason. */
1972 flag_tree_vect_loop_version
1974 && (!loop->inner); /* FORNOW */
1978 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1980 stmt = DR_STMT (dr);
1981 stmt_info = vinfo_for_stmt (stmt);
1983 /* For interleaving, only the alignment of the first access
1985 if (aligned_access_p (dr)
1986 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1987 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1990 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1992 if (!supportable_dr_alignment)
1998 if (known_alignment_for_access_p (dr)
1999 || VEC_length (tree,
2000 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
2001 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2003 do_versioning = false;
2007 stmt = DR_STMT (dr);
2008 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2009 gcc_assert (vectype);
2011 /* The rightmost bits of an aligned address must be zeros.
2012 Construct the mask needed for this test. For example,
2013 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2014 mask must be 15 = 0xf. */
2015 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
2017 /* FORNOW: use the same mask to test all potentially unaligned
2018 references in the loop. The vectorizer currently supports
2019 a single vector size, see the reference to
2020 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2021 vectorization factor is computed. */
2022 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2023 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2024 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2025 VEC_safe_push (tree, heap,
2026 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2031 /* Versioning requires at least one misaligned data reference. */
2032 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2033 do_versioning = false;
2034 else if (!do_versioning)
2035 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2040 VEC(tree,heap) *may_misalign_stmts
2041 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2044 /* It can now be assumed that the data references in the statements
2045 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2046 of the loop being vectorized. */
2047 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2049 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2050 dr = STMT_VINFO_DATA_REF (stmt_info);
2051 SET_DR_MISALIGNMENT (dr, 0);
2052 if (vect_print_dump_info (REPORT_ALIGNMENT))
2053 fprintf (vect_dump, "Alignment of access forced using versioning.");
2056 if (vect_print_dump_info (REPORT_DETAILS))
2057 fprintf (vect_dump, "Versioning for alignment will be applied.");
2059 /* Peeling and versioning can't be done together at this time. */
2060 gcc_assert (! (do_peeling && do_versioning));
2062 stat = vect_verify_datarefs_alignment (loop_vinfo);
2067 /* This point is reached if neither peeling nor versioning is being done. */
2068 gcc_assert (! (do_peeling || do_versioning));
2070 stat = vect_verify_datarefs_alignment (loop_vinfo);
2075 /* Function vect_analyze_data_refs_alignment
2077 Analyze the alignment of the data-references in the loop.
2078 Return FALSE if a data reference is found that cannot be vectorized. */
2081 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2083 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2086 if (!vect_compute_data_refs_alignment (loop_vinfo))
2088 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2090 "not vectorized: can't calculate alignment for data ref.");
2098 /* Analyze groups of strided accesses: check that DR belongs to a group of
2099 strided accesses of legal size, step, etc. Detect gaps, single element
2100 interleaving, and other special cases. Set strided access info.
2101 Collect groups of strided stores for further use in SLP analysis. */
2104 vect_analyze_group_access (struct data_reference *dr)
2106 tree step = DR_STEP (dr);
2107 tree scalar_type = TREE_TYPE (DR_REF (dr));
2108 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2109 tree stmt = DR_STMT (dr);
2110 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2111 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2112 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2113 HOST_WIDE_INT stride;
2114 bool slp_impossible = false;
2116 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2117 interleaving group (including gaps). */
2118 stride = dr_step / type_size;
2120 /* Not consecutive access is possible only if it is a part of interleaving. */
2121 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2123 /* Check if it this DR is a part of interleaving, and is a single
2124 element of the group that is accessed in the loop. */
2126 /* Gaps are supported only for loads. STEP must be a multiple of the type
2127 size. The size of the group must be a power of 2. */
2129 && (dr_step % type_size) == 0
2131 && exact_log2 (stride) != -1)
2133 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2134 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2135 if (vect_print_dump_info (REPORT_DR_DETAILS))
2137 fprintf (vect_dump, "Detected single element interleaving %d ",
2138 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2139 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2140 fprintf (vect_dump, " step ");
2141 print_generic_expr (vect_dump, step, TDF_SLIM);
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "not consecutive access");
2150 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2152 /* First stmt in the interleaving chain. Check the chain. */
2153 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2154 struct data_reference *data_ref = dr;
2155 unsigned int count = 1;
2157 tree prev_init = DR_INIT (data_ref);
2159 HOST_WIDE_INT diff, count_in_bytes;
2163 /* Skip same data-refs. In case that two or more stmts share data-ref
2164 (supported only for loads), we vectorize only the first stmt, and
2165 the rest get their vectorized loads from the first one. */
2166 if (!tree_int_cst_compare (DR_INIT (data_ref),
2167 DR_INIT (STMT_VINFO_DATA_REF (
2168 vinfo_for_stmt (next)))))
2170 if (!DR_IS_READ (data_ref))
2172 if (vect_print_dump_info (REPORT_DETAILS))
2173 fprintf (vect_dump, "Two store stmts share the same dr.");
2177 /* Check that there is no load-store dependencies for this loads
2178 to prevent a case of load-store-load to the same location. */
2179 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2180 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2182 if (vect_print_dump_info (REPORT_DETAILS))
2184 "READ_WRITE dependence in interleaving.");
2188 /* For load use the same data-ref load. */
2189 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2192 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2197 /* Check that all the accesses have the same STEP. */
2198 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2199 if (tree_int_cst_compare (step, next_step))
2201 if (vect_print_dump_info (REPORT_DETAILS))
2202 fprintf (vect_dump, "not consecutive access in interleaving");
2206 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2207 /* Check that the distance between two accesses is equal to the type
2208 size. Otherwise, we have gaps. */
2209 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2210 - TREE_INT_CST_LOW (prev_init)) / type_size;
2213 /* FORNOW: SLP of accesses with gaps is not supported. */
2214 slp_impossible = true;
2215 if (!DR_IS_READ (data_ref))
2217 if (vect_print_dump_info (REPORT_DETAILS))
2218 fprintf (vect_dump, "interleaved store with gaps");
2223 /* Store the gap from the previous member of the group. If there is no
2224 gap in the access, DR_GROUP_GAP is always 1. */
2225 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2227 prev_init = DR_INIT (data_ref);
2228 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2229 /* Count the number of data-refs in the chain. */
2233 /* COUNT is the number of accesses found, we multiply it by the size of
2234 the type to get COUNT_IN_BYTES. */
2235 count_in_bytes = type_size * count;
2237 /* Check that the size of the interleaving is not greater than STEP. */
2238 if (dr_step < count_in_bytes)
2240 if (vect_print_dump_info (REPORT_DETAILS))
2242 fprintf (vect_dump, "interleaving size is greater than step for ");
2243 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2248 /* Check that the size of the interleaving is equal to STEP for stores,
2249 i.e., that there are no gaps. */
2250 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
2252 if (vect_print_dump_info (REPORT_DETAILS))
2253 fprintf (vect_dump, "interleaved store with gaps");
2257 /* Check that STEP is a multiple of type size. */
2258 if ((dr_step % type_size) != 0)
2260 if (vect_print_dump_info (REPORT_DETAILS))
2262 fprintf (vect_dump, "step is not a multiple of type size: step ");
2263 print_generic_expr (vect_dump, step, TDF_SLIM);
2264 fprintf (vect_dump, " size ");
2265 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2271 /* FORNOW: we handle only interleaving that is a power of 2.
2272 We don't fail here if it may be still possible to vectorize the
2273 group using SLP. If not, the size of the group will be checked in
2274 vect_analyze_operations, and the vectorization will fail. */
2275 if (exact_log2 (stride) == -1)
2277 if (vect_print_dump_info (REPORT_DETAILS))
2278 fprintf (vect_dump, "interleaving is not a power of 2");
2283 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2284 if (vect_print_dump_info (REPORT_DETAILS))
2285 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2287 /* SLP: create an SLP data structure for every interleaving group of
2288 stores for further analysis in vect_analyse_slp. */
2289 if (!DR_IS_READ (dr) && !slp_impossible)
2290 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2297 /* Analyze the access pattern of the data-reference DR.
2298 In case of non-consecutive accesses call vect_analyze_group_access() to
2299 analyze groups of strided accesses. */
2302 vect_analyze_data_ref_access (struct data_reference *dr)
2304 tree step = DR_STEP (dr);
2305 tree scalar_type = TREE_TYPE (DR_REF (dr));
2306 tree stmt = DR_STMT (dr);
2307 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2309 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2310 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2314 if (vect_print_dump_info (REPORT_DETAILS))
2315 fprintf (vect_dump, "bad data-ref access");
2319 /* Don't allow invariant accesses. */
2323 if (nested_in_vect_loop_p (loop, stmt))
2325 /* For the rest of the analysis we use the outer-loop step. */
2326 step = STMT_VINFO_DR_STEP (stmt_info);
2327 dr_step = TREE_INT_CST_LOW (step);
2331 if (vect_print_dump_info (REPORT_ALIGNMENT))
2332 fprintf (vect_dump, "zero step in outer loop.");
2333 if (DR_IS_READ (dr))
2341 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2343 /* Mark that it is not interleaving. */
2344 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2348 if (nested_in_vect_loop_p (loop, stmt))
2350 if (vect_print_dump_info (REPORT_ALIGNMENT))
2351 fprintf (vect_dump, "strided access in outer loop.");
2355 /* Not consecutive access - check if it's a part of interleaving group. */
2356 return vect_analyze_group_access (dr);
2360 /* Function vect_analyze_data_ref_accesses.
2362 Analyze the access pattern of all the data references in the loop.
2364 FORNOW: the only access pattern that is considered vectorizable is a
2365 simple step 1 (consecutive) access.
2367 FORNOW: handle only arrays and pointer accesses. */
2370 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2373 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2374 struct data_reference *dr;
2376 if (vect_print_dump_info (REPORT_DETAILS))
2377 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2379 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2380 if (!vect_analyze_data_ref_access (dr))
2382 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2383 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2390 /* Function vect_prune_runtime_alias_test_list.
2392 Prune a list of ddrs to be tested at run-time by versioning for alias.
2393 Return FALSE if resulting list of ddrs is longer then allowed by
2394 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2397 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2399 VEC (ddr_p, heap) * ddrs =
2400 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2403 if (vect_print_dump_info (REPORT_DETAILS))
2404 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2406 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2411 ddr_i = VEC_index (ddr_p, ddrs, i);
2414 for (j = 0; j < i; j++)
2416 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2418 if (vect_vfa_range_equal (ddr_i, ddr_j))
2420 if (vect_print_dump_info (REPORT_DR_DETAILS))
2422 fprintf (vect_dump, "found equal ranges ");
2423 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2424 fprintf (vect_dump, ", ");
2425 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2426 fprintf (vect_dump, " and ");
2427 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2428 fprintf (vect_dump, ", ");
2429 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2438 VEC_ordered_remove (ddr_p, ddrs, i);
2444 if (VEC_length (ddr_p, ddrs) >
2445 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2447 if (vect_print_dump_info (REPORT_DR_DETAILS))
2450 "disable versioning for alias - max number of generated "
2451 "checks exceeded.");
2454 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2462 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2465 vect_free_slp_tree (slp_tree node)
2470 if (SLP_TREE_LEFT (node))
2471 vect_free_slp_tree (SLP_TREE_LEFT (node));
2473 if (SLP_TREE_RIGHT (node))
2474 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2476 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2478 if (SLP_TREE_VEC_STMTS (node))
2479 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2485 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2486 of a legal type and that they match the defs of the first stmt of the SLP
2487 group (stored in FIRST_STMT_...). */
2490 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2491 tree rhs, VEC (tree, heap) **def_stmts0,
2492 VEC (tree, heap) **def_stmts1,
2493 enum vect_def_type *first_stmt_dt0,
2494 enum vect_def_type *first_stmt_dt1,
2495 tree *first_stmt_def0_type,
2496 tree *first_stmt_def1_type,
2497 tree *first_stmt_const_oprnd,
2498 int ncopies_for_cost)
2501 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2502 unsigned int i, number_of_oprnds = op_type;
2504 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2505 stmt_vec_info stmt_info =
2506 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2510 number_of_oprnds = 1;
2512 gcc_assert (op_type == unary_op || op_type == binary_op);
2514 for (i = 0; i < number_of_oprnds; i++)
2517 oprnd = TREE_OPERAND (rhs, i);
2521 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2522 || (!def_stmt && dt[i] != vect_constant_def))
2524 if (vect_print_dump_info (REPORT_SLP))
2526 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2527 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2533 if (!*first_stmt_dt0)
2535 /* op0 of the first stmt of the group - store its info. */
2536 *first_stmt_dt0 = dt[i];
2538 *first_stmt_def0_type = TREE_TYPE (def);
2540 *first_stmt_const_oprnd = oprnd;
2542 /* Analyze costs (for the first stmt of the group only). */
2544 /* Not memory operation (we don't call this functions for loads). */
2545 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2548 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2553 if (!*first_stmt_dt1 && i == 1)
2555 /* op1 of the first stmt of the group - store its info. */
2556 *first_stmt_dt1 = dt[i];
2558 *first_stmt_def1_type = TREE_TYPE (def);
2561 /* We assume that the stmt contains only one constant
2562 operand. We fail otherwise, to be on the safe side. */
2563 if (*first_stmt_const_oprnd)
2565 if (vect_print_dump_info (REPORT_SLP))
2566 fprintf (vect_dump, "Build SLP failed: two constant "
2570 *first_stmt_const_oprnd = oprnd;
2575 /* Not first stmt of the group, check that the def-stmt/s match
2576 the def-stmt/s of the first stmt. */
2578 && (*first_stmt_dt0 != dt[i]
2579 || (*first_stmt_def0_type && def
2580 && *first_stmt_def0_type != TREE_TYPE (def))))
2582 && (*first_stmt_dt1 != dt[i]
2583 || (*first_stmt_def1_type && def
2584 && *first_stmt_def1_type != TREE_TYPE (def))))
2586 && TREE_TYPE (*first_stmt_const_oprnd)
2587 != TREE_TYPE (oprnd)))
2589 if (vect_print_dump_info (REPORT_SLP))
2590 fprintf (vect_dump, "Build SLP failed: different types ");
2597 /* Check the types of the definitions. */
2600 case vect_constant_def:
2601 case vect_invariant_def:
2606 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2608 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2612 /* FORNOW: Not supported. */
2613 if (vect_print_dump_info (REPORT_SLP))
2615 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2616 print_generic_expr (vect_dump, def, TDF_SLIM);
2627 /* Recursively build an SLP tree starting from NODE.
2628 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2629 permutation or are of unsupported types of operation. Otherwise, return
2631 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2632 in the case of multiple types for now. */
2635 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2636 unsigned int group_size, bool *slp_impossible,
2637 int *inside_cost, int *outside_cost,
2638 int ncopies_for_cost)
2640 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2641 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2643 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2644 tree stmt = VEC_index (tree, stmts, 0);
2645 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2646 enum tree_code first_stmt_code = 0;
2647 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2648 tree lhs, rhs, prev_stmt = NULL_TREE;
2649 bool stop_recursion = false, need_same_oprnds = false;
2650 tree vectype, scalar_type, first_op1 = NULL_TREE;
2651 unsigned int vectorization_factor = 0, ncopies;
2654 enum machine_mode optab_op2_mode;
2655 enum machine_mode vec_mode;
2656 tree first_stmt_const_oprnd = NULL_TREE;
2657 struct data_reference *first_dr;
2659 /* For every stmt in NODE find its def stmt/s. */
2660 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2662 if (vect_print_dump_info (REPORT_SLP))
2664 fprintf (vect_dump, "Build SLP for ");
2665 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2668 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2670 if (vect_print_dump_info (REPORT_SLP))
2672 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2673 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2679 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2680 vectype = get_vectype_for_scalar_type (scalar_type);
2681 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2682 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2683 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2687 if (vect_print_dump_info (REPORT_SLP))
2688 fprintf (vect_dump, "SLP failed - multiple types ");
2690 *slp_impossible = true;
2694 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2695 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2697 /* Check the operation. */
2700 first_stmt_code = TREE_CODE (rhs);
2702 /* Shift arguments should be equal in all the packed stmts for a
2703 vector shift with scalar shift operand. */
2704 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2706 vec_mode = TYPE_MODE (vectype);
2707 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2710 if (vect_print_dump_info (REPORT_SLP))
2711 fprintf (vect_dump, "Build SLP failed: no optab.");
2714 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2715 if (icode == CODE_FOR_nothing)
2717 if (vect_print_dump_info (REPORT_SLP))
2719 "Build SLP failed: op not supported by target.");
2722 optab_op2_mode = insn_data[icode].operand[2].mode;
2723 if (!VECTOR_MODE_P (optab_op2_mode))
2725 need_same_oprnds = true;
2726 first_op1 = TREE_OPERAND (rhs, 1);
2732 if (first_stmt_code != TREE_CODE (rhs))
2734 if (vect_print_dump_info (REPORT_SLP))
2737 "Build SLP failed: different operation in stmt ");
2738 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2744 if (need_same_oprnds
2745 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2747 if (vect_print_dump_info (REPORT_SLP))
2750 "Build SLP failed: different shift arguments in ");
2751 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2758 /* Strided store or load. */
2759 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2761 if (REFERENCE_CLASS_P (lhs))
2764 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2765 &def_stmts0, &def_stmts1,
2768 &first_stmt_def0_type,
2769 &first_stmt_def1_type,
2770 &first_stmt_const_oprnd,
2779 /* First stmt of the SLP group should be the first load of
2780 the interleaving loop if data permutation is not
2782 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2784 /* FORNOW: data permutations are not supported. */
2785 if (vect_print_dump_info (REPORT_SLP))
2787 fprintf (vect_dump, "Build SLP failed: strided "
2788 " loads need permutation ");
2789 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2795 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2796 if (vect_supportable_dr_alignment (first_dr)
2797 == dr_unaligned_unsupported)
2799 if (vect_print_dump_info (REPORT_SLP))
2801 fprintf (vect_dump, "Build SLP failed: unsupported "
2802 " unaligned load ");
2803 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2809 /* Analyze costs (for the first stmt in the group). */
2810 vect_model_load_cost (vinfo_for_stmt (stmt),
2811 ncopies_for_cost, *node);
2815 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2817 /* FORNOW: data permutations are not supported. */
2818 if (vect_print_dump_info (REPORT_SLP))
2820 fprintf (vect_dump, "Build SLP failed: strided "
2821 " loads need permutation ");
2822 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2830 /* We stop the tree when we reach a group of loads. */
2831 stop_recursion = true;
2834 } /* Strided access. */
2837 if (REFERENCE_CLASS_P (rhs))
2839 /* Not strided load. */
2840 if (vect_print_dump_info (REPORT_SLP))
2842 fprintf (vect_dump, "Build SLP failed: not strided load ");
2843 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2846 /* FORNOW: Not strided loads are not supported. */
2850 /* Not memory operation. */
2851 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2853 if (vect_print_dump_info (REPORT_SLP))
2855 fprintf (vect_dump, "Build SLP failed: operation");
2856 fprintf (vect_dump, " unsupported ");
2857 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2863 /* Find the def-stmts. */
2864 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2865 &def_stmts1, &first_stmt_dt0,
2867 &first_stmt_def0_type,
2868 &first_stmt_def1_type,
2869 &first_stmt_const_oprnd,
2875 /* Add the costs of the node to the overall instance costs. */
2876 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2877 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2879 /* Strided loads were reached - stop the recursion. */
2883 /* Create SLP_TREE nodes for the definition node/s. */
2884 if (first_stmt_dt0 == vect_loop_def)
2886 slp_tree left_node = XNEW (struct _slp_tree);
2887 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2888 SLP_TREE_VEC_STMTS (left_node) = NULL;
2889 SLP_TREE_LEFT (left_node) = NULL;
2890 SLP_TREE_RIGHT (left_node) = NULL;
2891 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2892 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2893 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2894 slp_impossible, inside_cost, outside_cost,
2898 SLP_TREE_LEFT (*node) = left_node;
2901 if (first_stmt_dt1 == vect_loop_def)
2903 slp_tree right_node = XNEW (struct _slp_tree);
2904 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2905 SLP_TREE_VEC_STMTS (right_node) = NULL;
2906 SLP_TREE_LEFT (right_node) = NULL;
2907 SLP_TREE_RIGHT (right_node) = NULL;
2908 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2909 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2910 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2911 slp_impossible, inside_cost, outside_cost,
2915 SLP_TREE_RIGHT (*node) = right_node;
2923 vect_print_slp_tree (slp_tree node)
2931 fprintf (vect_dump, "node ");
2932 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2934 fprintf (vect_dump, "\n\tstmt %d ", i);
2935 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2937 fprintf (vect_dump, "\n");
2939 vect_print_slp_tree (SLP_TREE_LEFT (node));
2940 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2944 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2945 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2946 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2947 stmts in NODE are to be marked. */
2950 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2958 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2959 if (j < 0 || i == j)
2960 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2962 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2963 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2967 /* Analyze an SLP instance starting from a group of strided stores. Call
2968 vect_build_slp_tree to build a tree of packed stmts if possible.
2969 Return FALSE if it's impossible to SLP any stmt in the loop. */
2972 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2974 slp_instance new_instance;
2975 slp_tree node = XNEW (struct _slp_tree);
2976 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2977 unsigned int unrolling_factor = 1, nunits;
2978 tree vectype, scalar_type, next;
2979 unsigned int vectorization_factor = 0, ncopies;
2980 bool slp_impossible = false;
2981 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2983 /* FORNOW: multiple types are not supported. */
2984 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2985 vectype = get_vectype_for_scalar_type (scalar_type);
2986 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2987 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2988 ncopies = vectorization_factor / nunits;
2991 if (vect_print_dump_info (REPORT_SLP))
2992 fprintf (vect_dump, "SLP failed - multiple types ");
2997 /* Create a node (a root of the SLP tree) for the packed strided stores. */
2998 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3000 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3003 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3004 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3007 SLP_TREE_VEC_STMTS (node) = NULL;
3008 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3009 SLP_TREE_LEFT (node) = NULL;
3010 SLP_TREE_RIGHT (node) = NULL;
3011 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3012 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3014 /* Calculate the unrolling factor. */
3015 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3017 /* Calculate the number of vector stmts to create based on the unrolling
3018 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3019 GROUP_SIZE / NUNITS otherwise. */
3020 ncopies_for_cost = unrolling_factor * group_size / nunits;
3022 /* Build the tree for the SLP instance. */
3023 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3024 &inside_cost, &outside_cost, ncopies_for_cost))
3026 /* Create a new SLP instance. */
3027 new_instance = XNEW (struct _slp_instance);
3028 SLP_INSTANCE_TREE (new_instance) = node;
3029 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3030 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3031 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3032 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3033 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3035 if (vect_print_dump_info (REPORT_SLP))
3036 vect_print_slp_tree (node);
3041 /* Failed to SLP. */
3042 /* Free the allocated memory. */
3043 vect_free_slp_tree (node);
3048 /* SLP failed for this instance, but it is still possible to SLP other stmts
3054 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3055 trees of packed scalar stmts if SLP is possible. */
3058 vect_analyze_slp (loop_vec_info loop_vinfo)
3061 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3064 if (vect_print_dump_info (REPORT_SLP))
3065 fprintf (vect_dump, "=== vect_analyze_slp ===");
3067 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3068 if (!vect_analyze_slp_instance (loop_vinfo, store))
3070 /* SLP failed. No instance can be SLPed in the loop. */
3071 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3072 fprintf (vect_dump, "SLP failed.");
3081 /* For each possible SLP instance decide whether to SLP it and calculate overall
3082 unrolling factor needed to SLP the loop. */
3085 vect_make_slp_decision (loop_vec_info loop_vinfo)
3087 unsigned int i, unrolling_factor = 1;
3088 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3089 slp_instance instance;
3090 int decided_to_slp = 0;
3092 if (vect_print_dump_info (REPORT_SLP))
3093 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3095 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3097 /* FORNOW: SLP if you can. */
3098 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3099 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3101 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3102 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3103 loop-based vectorization. Such stmts will be marked as HYBRID. */
3104 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3108 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3110 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3111 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3112 decided_to_slp, unrolling_factor);
3116 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3117 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3120 vect_detect_hybrid_slp_stmts (slp_tree node)
3124 imm_use_iterator imm_iter;
3130 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3131 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3132 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3133 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3134 if (vinfo_for_stmt (use_stmt)
3135 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3136 vect_mark_slp_stmts (node, hybrid, i);
3138 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3139 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3143 /* Find stmts that must be both vectorized and SLPed. */
3146 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3149 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3150 slp_instance instance;
3152 if (vect_print_dump_info (REPORT_SLP))
3153 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3155 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3156 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3160 /* Function vect_analyze_data_refs.
3162 Find all the data references in the loop.
3164 The general structure of the analysis of data refs in the vectorizer is as
3166 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3167 find and analyze all data-refs in the loop and their dependences.
3168 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3169 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3170 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3175 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3177 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3179 VEC (data_reference_p, heap) *datarefs;
3180 struct data_reference *dr;
3183 if (vect_print_dump_info (REPORT_DETAILS))
3184 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3186 compute_data_dependences_for_loop (loop, true,
3187 &LOOP_VINFO_DATAREFS (loop_vinfo),
3188 &LOOP_VINFO_DDRS (loop_vinfo));
3190 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3191 from stmt_vec_info struct to DR and vectype. */
3192 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3194 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3197 stmt_vec_info stmt_info;
3199 tree base, offset, init;
3201 if (!dr || !DR_REF (dr))
3203 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3204 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3208 stmt = DR_STMT (dr);
3209 stmt_info = vinfo_for_stmt (stmt);
3211 /* Check that analysis of the data-ref succeeded. */
3212 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3217 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3218 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3223 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3225 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3226 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3231 if (!DR_SYMBOL_TAG (dr))
3233 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3235 fprintf (vect_dump, "not vectorized: no memory tag for ");
3236 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3241 base = unshare_expr (DR_BASE_ADDRESS (dr));
3242 offset = unshare_expr (DR_OFFSET (dr));
3243 init = unshare_expr (DR_INIT (dr));
3245 /* Update DR field in stmt_vec_info struct. */
3246 bb = bb_for_stmt (stmt);
3248 /* If the dataref is in an inner-loop of the loop that is considered for
3249 for vectorization, we also want to analyze the access relative to
3250 the outer-loop (DR contains information only relative to the
3251 inner-most enclosing loop). We do that by building a reference to the
3252 first location accessed by the inner-loop, and analyze it relative to
3254 if (nested_in_vect_loop_p (loop, stmt))
3256 tree outer_step, outer_base, outer_init;
3257 HOST_WIDE_INT pbitsize, pbitpos;
3259 enum machine_mode pmode;
3260 int punsignedp, pvolatilep;
3261 affine_iv base_iv, offset_iv;
3264 /* Build a reference to the first location accessed by the
3265 inner-loop: *(BASE+INIT). (The first location is actually
3266 BASE+INIT+OFFSET, but we add OFFSET separately later. */
3267 tree inner_base = build_fold_indirect_ref
3268 (fold_build2 (PLUS_EXPR,
3269 TREE_TYPE (base), base, init));
3271 if (vect_print_dump_info (REPORT_DETAILS))
3273 fprintf (dump_file, "analyze in outer-loop: ");
3274 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3277 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3278 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3279 gcc_assert (outer_base != NULL_TREE);
3281 if (pbitpos % BITS_PER_UNIT != 0)
3283 if (vect_print_dump_info (REPORT_DETAILS))
3284 fprintf (dump_file, "failed: bit offset alignment.\n");
3288 outer_base = build_fold_addr_expr (outer_base);
3289 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3291 if (vect_print_dump_info (REPORT_DETAILS))
3292 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3299 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3306 offset_iv.base = ssize_int (0);
3307 offset_iv.step = ssize_int (0);
3309 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3311 if (vect_print_dump_info (REPORT_DETAILS))
3312 fprintf (dump_file, "evolution of offset is not affine.\n");
3316 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3317 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3318 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3319 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3320 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3322 outer_step = size_binop (PLUS_EXPR,
3323 fold_convert (ssizetype, base_iv.step),
3324 fold_convert (ssizetype, offset_iv.step));
3326 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3327 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3328 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3329 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3330 STMT_VINFO_DR_OFFSET (stmt_info) =
3331 fold_convert (ssizetype, offset_iv.base);
3332 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3333 size_int (highest_pow2_factor (offset_iv.base));
3335 if (dump_file && (dump_flags & TDF_DETAILS))
3337 fprintf (dump_file, "\touter base_address: ");
3338 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3339 fprintf (dump_file, "\n\touter offset from base address: ");
3340 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3341 fprintf (dump_file, "\n\touter constant offset from base address: ");
3342 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3343 fprintf (dump_file, "\n\touter step: ");
3344 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3345 fprintf (dump_file, "\n\touter aligned to: ");
3346 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3350 if (STMT_VINFO_DATA_REF (stmt_info))
3352 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3355 "not vectorized: more than one data ref in stmt: ");
3356 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3360 STMT_VINFO_DATA_REF (stmt_info) = dr;
3362 /* Set vectype for STMT. */
3363 scalar_type = TREE_TYPE (DR_REF (dr));
3364 STMT_VINFO_VECTYPE (stmt_info) =
3365 get_vectype_for_scalar_type (scalar_type);
3366 if (!STMT_VINFO_VECTYPE (stmt_info))
3368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3371 "not vectorized: no vectype for stmt: ");
3372 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3373 fprintf (vect_dump, " scalar_type: ");
3374 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3384 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3386 /* Function vect_mark_relevant.
3388 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3391 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3392 enum vect_relevant relevant, bool live_p)
3394 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3395 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3396 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3398 if (vect_print_dump_info (REPORT_DETAILS))
3399 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3401 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3405 /* This is the last stmt in a sequence that was detected as a
3406 pattern that can potentially be vectorized. Don't mark the stmt
3407 as relevant/live because it's not going to be vectorized.
3408 Instead mark the pattern-stmt that replaces it. */
3410 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3412 if (vect_print_dump_info (REPORT_DETAILS))
3413 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3414 stmt_info = vinfo_for_stmt (pattern_stmt);
3415 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3416 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3417 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3418 stmt = pattern_stmt;
3421 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3422 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3423 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3425 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3426 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3428 if (vect_print_dump_info (REPORT_DETAILS))
3429 fprintf (vect_dump, "already marked relevant/live.");
3433 VEC_safe_push (tree, heap, *worklist, stmt);
3437 /* Function vect_stmt_relevant_p.
3439 Return true if STMT in loop that is represented by LOOP_VINFO is
3440 "relevant for vectorization".
3442 A stmt is considered "relevant for vectorization" if:
3443 - it has uses outside the loop.
3444 - it has vdefs (it alters memory).
3445 - control stmts in the loop (except for the exit condition).
3447 CHECKME: what other side effects would the vectorizer allow? */
3450 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3451 enum vect_relevant *relevant, bool *live_p)
3453 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3454 ssa_op_iter op_iter;
3455 imm_use_iterator imm_iter;
3456 use_operand_p use_p;
3457 def_operand_p def_p;
3459 *relevant = vect_unused_in_loop;
3462 /* cond stmt other than loop exit cond. */
3463 if (is_ctrl_stmt (stmt)
3464 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3465 *relevant = vect_used_in_loop;
3467 /* changing memory. */
3468 if (TREE_CODE (stmt) != PHI_NODE)
3469 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3471 if (vect_print_dump_info (REPORT_DETAILS))
3472 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3473 *relevant = vect_used_in_loop;
3476 /* uses outside the loop. */
3477 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3479 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3481 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3482 if (!flow_bb_inside_loop_p (loop, bb))
3484 if (vect_print_dump_info (REPORT_DETAILS))
3485 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3487 /* We expect all such uses to be in the loop exit phis
3488 (because of loop closed form) */
3489 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3490 gcc_assert (bb == single_exit (loop)->dest);
3497 return (*live_p || *relevant);
3502 Function process_use.
3505 - a USE in STMT in a loop represented by LOOP_VINFO
3506 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3507 that defined USE. This is dont by calling mark_relevant and passing it
3508 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3511 Generally, LIVE_P and RELEVANT are used to define the liveness and
3512 relevance info of the DEF_STMT of this USE:
3513 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3514 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3516 - case 1: If USE is used only for address computations (e.g. array indexing),
3517 which does not need to be directly vectorized, then the liveness/relevance
3518 of the respective DEF_STMT is left unchanged.
3519 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3520 skip DEF_STMT cause it had already been processed.
3521 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3522 be modified accordingly.
3524 Return true if everything is as expected. Return false otherwise. */
3527 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3528 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3530 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3531 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3532 stmt_vec_info dstmt_vinfo;
3533 basic_block bb, def_bb;
3535 enum vect_def_type dt;
3537 /* case 1: we are only interested in uses that need to be vectorized. Uses
3538 that are used for address computation are not considered relevant. */
3539 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3542 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3545 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3549 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3552 def_bb = bb_for_stmt (def_stmt);
3553 if (!flow_bb_inside_loop_p (loop, def_bb))
3555 if (vect_print_dump_info (REPORT_DETAILS))
3556 fprintf (vect_dump, "def_stmt is out of loop.");
3560 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3561 DEF_STMT must have already been processed, because this should be the
3562 only way that STMT, which is a reduction-phi, was put in the worklist,
3563 as there should be no other uses for DEF_STMT in the loop. So we just
3564 check that everything is as expected, and we are done. */
3565 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3566 bb = bb_for_stmt (stmt);
3567 if (TREE_CODE (stmt) == PHI_NODE
3568 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3569 && TREE_CODE (def_stmt) != PHI_NODE
3570 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3571 && bb->loop_father == def_bb->loop_father)
3573 if (vect_print_dump_info (REPORT_DETAILS))
3574 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3575 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3576 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3577 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3578 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3579 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3583 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3584 outer-loop-header-bb:
3590 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3592 if (vect_print_dump_info (REPORT_DETAILS))
3593 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3596 case vect_unused_in_loop:
3597 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3598 vect_used_by_reduction : vect_unused_in_loop;
3600 case vect_used_in_outer_by_reduction:
3601 relevant = vect_used_by_reduction;
3603 case vect_used_in_outer:
3604 relevant = vect_used_in_loop;
3606 case vect_used_by_reduction:
3607 case vect_used_in_loop:
3615 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3616 outer-loop-header-bb:
3622 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3624 if (vect_print_dump_info (REPORT_DETAILS))
3625 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3628 case vect_unused_in_loop:
3629 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3630 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3633 case vect_used_in_outer_by_reduction:
3634 case vect_used_in_outer:
3637 case vect_used_by_reduction:
3638 relevant = vect_used_in_outer_by_reduction;
3641 case vect_used_in_loop:
3642 relevant = vect_used_in_outer;
3650 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3655 /* Function vect_mark_stmts_to_be_vectorized.
3657 Not all stmts in the loop need to be vectorized. For example:
3666 Stmt 1 and 3 do not need to be vectorized, because loop control and
3667 addressing of vectorized data-refs are handled differently.
3669 This pass detects such stmts. */
3672 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3674 VEC(tree,heap) *worklist;
3675 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3676 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3677 unsigned int nbbs = loop->num_nodes;
3678 block_stmt_iterator si;
3682 stmt_vec_info stmt_vinfo;
3686 enum vect_relevant relevant;
3688 if (vect_print_dump_info (REPORT_DETAILS))
3689 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3691 worklist = VEC_alloc (tree, heap, 64);
3693 /* 1. Init worklist. */
3694 for (i = 0; i < nbbs; i++)
3697 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3699 if (vect_print_dump_info (REPORT_DETAILS))
3701 fprintf (vect_dump, "init: phi relevant? ");
3702 print_generic_expr (vect_dump, phi, TDF_SLIM);
3705 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3706 vect_mark_relevant (&worklist, phi, relevant, live_p);
3708 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3710 stmt = bsi_stmt (si);
3711 if (vect_print_dump_info (REPORT_DETAILS))
3713 fprintf (vect_dump, "init: stmt relevant? ");
3714 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3717 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3718 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3722 /* 2. Process_worklist */
3723 while (VEC_length (tree, worklist) > 0)
3725 use_operand_p use_p;
3728 stmt = VEC_pop (tree, worklist);
3729 if (vect_print_dump_info (REPORT_DETAILS))
3731 fprintf (vect_dump, "worklist: examine stmt: ");
3732 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3735 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3736 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3737 liveness and relevance properties of STMT. */
3738 ann = stmt_ann (stmt);
3739 stmt_vinfo = vinfo_for_stmt (stmt);
3740 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3741 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3743 /* Generally, the liveness and relevance properties of STMT are
3744 propagated as is to the DEF_STMTs of its USEs:
3745 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3746 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3748 One exception is when STMT has been identified as defining a reduction
3749 variable; in this case we set the liveness/relevance as follows:
3751 relevant = vect_used_by_reduction
3752 This is because we distinguish between two kinds of relevant stmts -
3753 those that are used by a reduction computation, and those that are
3754 (also) used by a regular computation. This allows us later on to
3755 identify stmts that are used solely by a reduction, and therefore the
3756 order of the results that they produce does not have to be kept.
3758 Reduction phis are expected to be used by a reduction stmt, or by
3759 in an outer loop; Other reduction stmts are expected to be
3760 in the loop, and possibly used by a stmt in an outer loop.
3761 Here are the expected values of "relevant" for reduction phis/stmts:
3764 vect_unused_in_loop ok
3765 vect_used_in_outer_by_reduction ok ok
3766 vect_used_in_outer ok ok
3767 vect_used_by_reduction ok
3768 vect_used_in_loop */
3770 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3772 enum vect_relevant tmp_relevant = relevant;
3773 switch (tmp_relevant)
3775 case vect_unused_in_loop:
3776 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3777 relevant = vect_used_by_reduction;
3780 case vect_used_in_outer_by_reduction:
3781 case vect_used_in_outer:
3782 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3783 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3786 case vect_used_by_reduction:
3787 if (TREE_CODE (stmt) == PHI_NODE)
3790 case vect_used_in_loop:
3792 if (vect_print_dump_info (REPORT_DETAILS))
3793 fprintf (vect_dump, "unsupported use of reduction.");
3794 VEC_free (tree, heap, worklist);
3800 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3802 tree op = USE_FROM_PTR (use_p);
3803 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3805 VEC_free (tree, heap, worklist);
3809 } /* while worklist */
3811 VEC_free (tree, heap, worklist);
3816 /* Function vect_can_advance_ivs_p
3818 In case the number of iterations that LOOP iterates is unknown at compile
3819 time, an epilog loop will be generated, and the loop induction variables
3820 (IVs) will be "advanced" to the value they are supposed to take just before
3821 the epilog loop. Here we check that the access function of the loop IVs
3822 and the expression that represents the loop bound are simple enough.
3823 These restrictions will be relaxed in the future. */
3826 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3828 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3829 basic_block bb = loop->header;
3832 /* Analyze phi functions of the loop header. */
3834 if (vect_print_dump_info (REPORT_DETAILS))
3835 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3837 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3839 tree access_fn = NULL;
3840 tree evolution_part;
3842 if (vect_print_dump_info (REPORT_DETAILS))
3844 fprintf (vect_dump, "Analyze phi: ");
3845 print_generic_expr (vect_dump, phi, TDF_SLIM);
3848 /* Skip virtual phi's. The data dependences that are associated with
3849 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3851 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3853 if (vect_print_dump_info (REPORT_DETAILS))
3854 fprintf (vect_dump, "virtual phi. skip.");
3858 /* Skip reduction phis. */
3860 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3862 if (vect_print_dump_info (REPORT_DETAILS))
3863 fprintf (vect_dump, "reduc phi. skip.");
3867 /* Analyze the evolution function. */
3869 access_fn = instantiate_parameters
3870 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3874 if (vect_print_dump_info (REPORT_DETAILS))
3875 fprintf (vect_dump, "No Access function.");
3879 if (vect_print_dump_info (REPORT_DETAILS))
3881 fprintf (vect_dump, "Access function of PHI: ");
3882 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3885 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3887 if (evolution_part == NULL_TREE)
3889 if (vect_print_dump_info (REPORT_DETAILS))
3890 fprintf (vect_dump, "No evolution.");
3894 /* FORNOW: We do not transform initial conditions of IVs
3895 which evolution functions are a polynomial of degree >= 2. */
3897 if (tree_is_chrec (evolution_part))
3905 /* Function vect_get_loop_niters.
3907 Determine how many iterations the loop is executed.
3908 If an expression that represents the number of iterations
3909 can be constructed, place it in NUMBER_OF_ITERATIONS.
3910 Return the loop exit condition. */
3913 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3917 if (vect_print_dump_info (REPORT_DETAILS))
3918 fprintf (vect_dump, "=== get_loop_niters ===");
3920 niters = number_of_exit_cond_executions (loop);
3922 if (niters != NULL_TREE
3923 && niters != chrec_dont_know)
3925 *number_of_iterations = niters;
3927 if (vect_print_dump_info (REPORT_DETAILS))
3929 fprintf (vect_dump, "==> get_loop_niters:" );
3930 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3934 return get_loop_exit_condition (loop);
3938 /* Function vect_analyze_loop_1.
3940 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3941 for it. The different analyses will record information in the
3942 loop_vec_info struct. This is a subset of the analyses applied in
3943 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3944 that is now considered for (outer-loop) vectorization. */
3946 static loop_vec_info
3947 vect_analyze_loop_1 (struct loop *loop)
3949 loop_vec_info loop_vinfo;
3951 if (vect_print_dump_info (REPORT_DETAILS))
3952 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3954 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3956 loop_vinfo = vect_analyze_loop_form (loop);
3959 if (vect_print_dump_info (REPORT_DETAILS))
3960 fprintf (vect_dump, "bad inner-loop form.");
3968 /* Function vect_analyze_loop_form.
3970 Verify that certain CFG restrictions hold, including:
3971 - the loop has a pre-header
3972 - the loop has a single entry and exit
3973 - the loop exit condition is simple enough, and the number of iterations
3974 can be analyzed (a countable loop). */
3976 static loop_vec_info
3977 vect_analyze_loop_form (struct loop *loop)
3979 loop_vec_info loop_vinfo;
3981 tree number_of_iterations = NULL;
3982 loop_vec_info inner_loop_vinfo = NULL;
3984 if (vect_print_dump_info (REPORT_DETAILS))
3985 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3987 /* Different restrictions apply when we are considering an inner-most loop,
3988 vs. an outer (nested) loop.
3989 (FORNOW. May want to relax some of these restrictions in the future). */
3993 /* Inner-most loop. We currently require that the number of BBs is
3994 exactly 2 (the header and latch). Vectorizable inner-most loops
4005 if (loop->num_nodes != 2)
4007 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4008 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4012 if (empty_block_p (loop->header))
4014 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4015 fprintf (vect_dump, "not vectorized: empty loop.");
4021 struct loop *innerloop = loop->inner;
4022 edge backedge, entryedge;
4024 /* Nested loop. We currently require that the loop is doubly-nested,
4025 contains a single inner loop, and the number of BBs is exactly 5.
4026 Vectorizable outer-loops look like this:
4038 The inner-loop has the properties expected of inner-most loops
4039 as described above. */
4041 if ((loop->inner)->inner || (loop->inner)->next)
4043 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4044 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4048 /* Analyze the inner-loop. */
4049 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4050 if (!inner_loop_vinfo)
4052 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4053 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4057 if (!expr_invariant_in_loop_p (loop,
4058 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4060 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4062 "not vectorized: inner-loop count not invariant.");
4063 destroy_loop_vec_info (inner_loop_vinfo, true);
4067 if (loop->num_nodes != 5)
4069 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4070 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4071 destroy_loop_vec_info (inner_loop_vinfo, true);
4075 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4076 backedge = EDGE_PRED (innerloop->header, 1);
4077 entryedge = EDGE_PRED (innerloop->header, 0);
4078 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4080 backedge = EDGE_PRED (innerloop->header, 0);
4081 entryedge = EDGE_PRED (innerloop->header, 1);
4084 if (entryedge->src != loop->header
4085 || !single_exit (innerloop)
4086 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4088 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4089 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4090 destroy_loop_vec_info (inner_loop_vinfo, true);
4094 if (vect_print_dump_info (REPORT_DETAILS))
4095 fprintf (vect_dump, "Considering outer-loop vectorization.");
4098 if (!single_exit (loop)
4099 || EDGE_COUNT (loop->header->preds) != 2)
4101 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4103 if (!single_exit (loop))
4104 fprintf (vect_dump, "not vectorized: multiple exits.");
4105 else if (EDGE_COUNT (loop->header->preds) != 2)
4106 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4108 if (inner_loop_vinfo)
4109 destroy_loop_vec_info (inner_loop_vinfo, true);
4113 /* We assume that the loop exit condition is at the end of the loop. i.e,
4114 that the loop is represented as a do-while (with a proper if-guard
4115 before the loop if needed), where the loop header contains all the
4116 executable statements, and the latch is empty. */
4117 if (!empty_block_p (loop->latch)
4118 || phi_nodes (loop->latch))
4120 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4121 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4122 if (inner_loop_vinfo)
4123 destroy_loop_vec_info (inner_loop_vinfo, true);
4127 /* Make sure there exists a single-predecessor exit bb: */
4128 if (!single_pred_p (single_exit (loop)->dest))
4130 edge e = single_exit (loop);
4131 if (!(e->flags & EDGE_ABNORMAL))
4133 split_loop_exit_edge (e);
4134 if (vect_print_dump_info (REPORT_DETAILS))
4135 fprintf (vect_dump, "split exit edge.");
4139 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4140 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4141 if (inner_loop_vinfo)
4142 destroy_loop_vec_info (inner_loop_vinfo, true);
4147 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4150 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4151 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4152 if (inner_loop_vinfo)
4153 destroy_loop_vec_info (inner_loop_vinfo, true);
4157 if (!number_of_iterations)
4159 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4161 "not vectorized: number of iterations cannot be computed.");
4162 if (inner_loop_vinfo)
4163 destroy_loop_vec_info (inner_loop_vinfo, true);
4167 if (chrec_contains_undetermined (number_of_iterations))
4169 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4170 fprintf (vect_dump, "Infinite number of iterations.");
4171 if (inner_loop_vinfo)
4172 destroy_loop_vec_info (inner_loop_vinfo, true);
4176 if (!NITERS_KNOWN_P (number_of_iterations))
4178 if (vect_print_dump_info (REPORT_DETAILS))
4180 fprintf (vect_dump, "Symbolic number of iterations is ");
4181 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4184 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4186 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4187 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4188 if (inner_loop_vinfo)
4189 destroy_loop_vec_info (inner_loop_vinfo, false);
4193 loop_vinfo = new_loop_vec_info (loop);
4194 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4196 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4198 /* CHECKME: May want to keep it around it in the future. */
4199 if (inner_loop_vinfo)
4200 destroy_loop_vec_info (inner_loop_vinfo, false);
4202 gcc_assert (!loop->aux);
4203 loop->aux = loop_vinfo;
4208 /* Function vect_analyze_loop.
4210 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4211 for it. The different analyses will record information in the
4212 loop_vec_info struct. */
4214 vect_analyze_loop (struct loop *loop)
4217 loop_vec_info loop_vinfo;
4219 if (vect_print_dump_info (REPORT_DETAILS))
4220 fprintf (vect_dump, "===== analyze_loop_nest =====");
4222 if (loop_outer (loop)
4223 && loop_vec_info_for_loop (loop_outer (loop))
4224 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4226 if (vect_print_dump_info (REPORT_DETAILS))
4227 fprintf (vect_dump, "outer-loop already vectorized.");
4231 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4233 loop_vinfo = vect_analyze_loop_form (loop);
4236 if (vect_print_dump_info (REPORT_DETAILS))
4237 fprintf (vect_dump, "bad loop form.");
4241 /* Find all data references in the loop (which correspond to vdefs/vuses)
4242 and analyze their evolution in the loop.
4244 FORNOW: Handle only simple, array references, which
4245 alignment can be forced, and aligned pointer-references. */
4247 ok = vect_analyze_data_refs (loop_vinfo);
4250 if (vect_print_dump_info (REPORT_DETAILS))
4251 fprintf (vect_dump, "bad data references.");
4252 destroy_loop_vec_info (loop_vinfo, true);
4256 /* Classify all cross-iteration scalar data-flow cycles.
4257 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4259 vect_analyze_scalar_cycles (loop_vinfo);
4261 vect_pattern_recog (loop_vinfo);
4263 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4265 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4268 if (vect_print_dump_info (REPORT_DETAILS))
4269 fprintf (vect_dump, "unexpected pattern.");
4270 destroy_loop_vec_info (loop_vinfo, true);
4274 /* Analyze the alignment of the data-refs in the loop.
4275 Fail if a data reference is found that cannot be vectorized. */
4277 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4280 if (vect_print_dump_info (REPORT_DETAILS))
4281 fprintf (vect_dump, "bad data alignment.");
4282 destroy_loop_vec_info (loop_vinfo, true);
4286 ok = vect_determine_vectorization_factor (loop_vinfo);
4289 if (vect_print_dump_info (REPORT_DETAILS))
4290 fprintf (vect_dump, "can't determine vectorization factor.");
4291 destroy_loop_vec_info (loop_vinfo, true);
4295 /* Analyze data dependences between the data-refs in the loop.
4296 FORNOW: fail at the first data dependence that we encounter. */
4298 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4301 if (vect_print_dump_info (REPORT_DETAILS))
4302 fprintf (vect_dump, "bad data dependence.");
4303 destroy_loop_vec_info (loop_vinfo, true);
4307 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4308 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4310 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "bad data access.");
4315 destroy_loop_vec_info (loop_vinfo, true);
4319 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4320 It is important to call pruning after vect_analyze_data_ref_accesses,
4321 since we use grouping information gathered by interleaving analysis. */
4322 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "too long list of versioning for alias "
4328 destroy_loop_vec_info (loop_vinfo, true);
4332 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4333 ok = vect_analyze_slp (loop_vinfo);
4336 /* Decide which possible SLP instances to SLP. */
4337 vect_make_slp_decision (loop_vinfo);
4339 /* Find stmts that need to be both vectorized and SLPed. */
4340 vect_detect_hybrid_slp (loop_vinfo);
4343 /* This pass will decide on using loop versioning and/or loop peeling in
4344 order to enhance the alignment of data references in the loop. */
4346 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4349 if (vect_print_dump_info (REPORT_DETAILS))
4350 fprintf (vect_dump, "bad data alignment.");
4351 destroy_loop_vec_info (loop_vinfo, true);
4355 /* Scan all the operations in the loop and make sure they are
4358 ok = vect_analyze_operations (loop_vinfo);
4361 if (vect_print_dump_info (REPORT_DETAILS))
4362 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4363 destroy_loop_vec_info (loop_vinfo, true);
4367 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;