1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
44 static bool vect_can_advance_ivs_p (loop_vec_info);
46 /* Function vect_determine_vectorization_factor
48 Determine the vectorization factor (VF). VF is the number of data elements
49 that are operated upon in parallel in a single iteration of the vectorized
50 loop. For example, when vectorizing a loop that operates on 4byte elements,
51 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
52 elements can fit in a single vector register.
54 We currently support vectorization of loops in which all types operated upon
55 are of the same size. Therefore this function currently sets VF according to
56 the size of the types operated upon, and fails if there are multiple sizes
59 VF is also the factor by which the loop iterations are strip-mined, e.g.:
66 for (i=0; i<N; i+=VF){
67 a[i:VF] = b[i:VF] + c[i:VF];
72 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
74 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
75 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
76 int nbbs = loop->num_nodes;
77 block_stmt_iterator si;
78 unsigned int vectorization_factor = 0;
83 stmt_vec_info stmt_info;
86 if (vect_print_dump_info (REPORT_DETAILS))
87 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
89 for (i = 0; i < nbbs; i++)
91 basic_block bb = bbs[i];
93 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
95 stmt_info = vinfo_for_stmt (phi);
96 if (vect_print_dump_info (REPORT_DETAILS))
98 fprintf (vect_dump, "==> examining phi: ");
99 print_generic_expr (vect_dump, phi, TDF_SLIM);
102 gcc_assert (stmt_info);
104 if (STMT_VINFO_RELEVANT_P (stmt_info))
106 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
107 scalar_type = TREE_TYPE (PHI_RESULT (phi));
109 if (vect_print_dump_info (REPORT_DETAILS))
111 fprintf (vect_dump, "get vectype for scalar type: ");
112 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
115 vectype = get_vectype_for_scalar_type (scalar_type);
118 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
121 "not vectorized: unsupported data-type ");
122 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
126 STMT_VINFO_VECTYPE (stmt_info) = vectype;
128 if (vect_print_dump_info (REPORT_DETAILS))
130 fprintf (vect_dump, "vectype: ");
131 print_generic_expr (vect_dump, vectype, TDF_SLIM);
134 nunits = TYPE_VECTOR_SUBPARTS (vectype);
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "nunits = %d", nunits);
138 if (!vectorization_factor
139 || (nunits > vectorization_factor))
140 vectorization_factor = nunits;
144 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
146 tree stmt = bsi_stmt (si);
147 stmt_info = vinfo_for_stmt (stmt);
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "==> examining statement: ");
152 print_generic_expr (vect_dump, stmt, TDF_SLIM);
155 gcc_assert (stmt_info);
157 /* skip stmts which do not need to be vectorized. */
158 if (!STMT_VINFO_RELEVANT_P (stmt_info)
159 && !STMT_VINFO_LIVE_P (stmt_info))
161 if (vect_print_dump_info (REPORT_DETAILS))
162 fprintf (vect_dump, "skip.");
166 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
168 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170 fprintf (vect_dump, "not vectorized: irregular stmt.");
171 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 if (!GIMPLE_STMT_P (stmt)
177 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
179 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
181 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
182 print_generic_expr (vect_dump, stmt, TDF_SLIM);
187 if (STMT_VINFO_VECTYPE (stmt_info))
189 /* The only case when a vectype had been already set is for stmts
190 that contain a dataref, or for "pattern-stmts" (stmts generated
191 by the vectorizer to represent/replace a certain idiom). */
192 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
193 || is_pattern_stmt_p (stmt_info));
194 vectype = STMT_VINFO_VECTYPE (stmt_info);
200 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
201 && !is_pattern_stmt_p (stmt_info));
203 /* We generally set the vectype according to the type of the
205 For stmts whose result-type is different than the type of the
206 arguments (e.g. demotion, promotion), vectype will be reset
207 appropriately (later). Note that we have to visit the smallest
208 datatype in this function, because that determines the VF.
209 If the smallest datatype in the loop is present only as the
210 rhs of a promotion operation - we'd miss it here.
211 Such a case, where a variable of this datatype does not appear
212 in the lhs anywhere in the loop, can only occur if it's an
213 invariant: e.g.: 'int_x = (int) short_inv', which we'd expect
214 to have been optimized away by invariant motion. However, we
215 cannot rely on invariant motion to always take invariants out
216 of the loop, and so in the case of promotion we also have to
218 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
220 operation = GIMPLE_STMT_OPERAND (stmt, 1);
221 if (TREE_CODE (operation) == NOP_EXPR
222 || TREE_CODE (operation) == CONVERT_EXPR
223 || TREE_CODE (operation) == WIDEN_MULT_EXPR
224 || TREE_CODE (operation) == FLOAT_EXPR)
226 tree rhs_type = TREE_TYPE (TREE_OPERAND (operation, 0));
227 if (TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type)) <
228 TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type)))
229 scalar_type = rhs_type;
232 if (vect_print_dump_info (REPORT_DETAILS))
234 fprintf (vect_dump, "get vectype for scalar type: ");
235 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
238 vectype = get_vectype_for_scalar_type (scalar_type);
241 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
244 "not vectorized: unsupported data-type ");
245 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
249 STMT_VINFO_VECTYPE (stmt_info) = vectype;
252 if (vect_print_dump_info (REPORT_DETAILS))
254 fprintf (vect_dump, "vectype: ");
255 print_generic_expr (vect_dump, vectype, TDF_SLIM);
258 nunits = TYPE_VECTOR_SUBPARTS (vectype);
259 if (vect_print_dump_info (REPORT_DETAILS))
260 fprintf (vect_dump, "nunits = %d", nunits);
262 if (!vectorization_factor
263 || (nunits > vectorization_factor))
264 vectorization_factor = nunits;
269 /* TODO: Analyze cost. Decide if worth while to vectorize. */
270 if (vect_print_dump_info (REPORT_DETAILS))
271 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
272 if (vectorization_factor <= 1)
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 fprintf (vect_dump, "not vectorized: unsupported data-type");
278 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
284 /* SLP costs are calculated according to SLP instance unrolling factor (i.e.,
285 the number of created vector stmts depends on the unrolling factor). However,
286 the actual number of vector stmts for every SLP node depends on VF which is
287 set later in vect_analyze_operations(). Hence, SLP costs should be updated.
288 In this function we assume that the inside costs calculated in
289 vect_model_xxx_cost are linear in ncopies. */
292 vect_update_slp_costs_according_to_vf (loop_vec_info loop_vinfo)
294 unsigned int i, vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
295 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
296 slp_instance instance;
298 if (vect_print_dump_info (REPORT_SLP))
299 fprintf (vect_dump, "=== vect_update_slp_costs_according_to_vf ===");
301 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
302 /* We assume that costs are linear in ncopies. */
303 SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance) *= vf
304 / SLP_INSTANCE_UNROLLING_FACTOR (instance);
308 /* Function vect_analyze_operations.
310 Scan the loop stmts and make sure they are all vectorizable. */
313 vect_analyze_operations (loop_vec_info loop_vinfo)
315 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
316 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
317 int nbbs = loop->num_nodes;
318 block_stmt_iterator si;
319 unsigned int vectorization_factor = 0;
323 stmt_vec_info stmt_info;
324 bool need_to_vectorize = false;
325 int min_profitable_iters;
326 int min_scalar_loop_bound;
328 bool only_slp_in_loop = true;
330 if (vect_print_dump_info (REPORT_DETAILS))
331 fprintf (vect_dump, "=== vect_analyze_operations ===");
333 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
334 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
336 for (i = 0; i < nbbs; i++)
338 basic_block bb = bbs[i];
340 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
344 stmt_info = vinfo_for_stmt (phi);
345 if (vect_print_dump_info (REPORT_DETAILS))
347 fprintf (vect_dump, "examining phi: ");
348 print_generic_expr (vect_dump, phi, TDF_SLIM);
351 if (! is_loop_header_bb_p (bb))
353 /* inner-loop loop-closed exit phi in outer-loop vectorization
354 (i.e. a phi in the tail of the outer-loop).
355 FORNOW: we currently don't support the case that these phis
356 are not used in the outerloop, cause this case requires
357 to actually do something here. */
358 if (!STMT_VINFO_RELEVANT_P (stmt_info)
359 || STMT_VINFO_LIVE_P (stmt_info))
361 if (vect_print_dump_info (REPORT_DETAILS))
363 "Unsupported loop-closed phi in outer-loop.");
369 gcc_assert (stmt_info);
371 if (STMT_VINFO_LIVE_P (stmt_info))
373 /* FORNOW: not yet supported. */
374 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 fprintf (vect_dump, "not vectorized: value used after loop.");
379 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
380 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
382 /* A scalar-dependence cycle that we don't support. */
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
384 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
388 if (STMT_VINFO_RELEVANT_P (stmt_info))
390 need_to_vectorize = true;
391 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
392 ok = vectorizable_induction (phi, NULL, NULL);
397 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
400 "not vectorized: relevant phi not supported: ");
401 print_generic_expr (vect_dump, phi, TDF_SLIM);
407 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
409 tree stmt = bsi_stmt (si);
410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
411 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
413 if (vect_print_dump_info (REPORT_DETAILS))
415 fprintf (vect_dump, "==> examining statement: ");
416 print_generic_expr (vect_dump, stmt, TDF_SLIM);
419 gcc_assert (stmt_info);
421 /* skip stmts which do not need to be vectorized.
422 this is expected to include:
423 - the COND_EXPR which is the loop exit condition
424 - any LABEL_EXPRs in the loop
425 - computations that are used only for array indexing or loop
428 if (!STMT_VINFO_RELEVANT_P (stmt_info)
429 && !STMT_VINFO_LIVE_P (stmt_info))
431 if (vect_print_dump_info (REPORT_DETAILS))
432 fprintf (vect_dump, "irrelevant.");
436 switch (STMT_VINFO_DEF_TYPE (stmt_info))
441 case vect_reduction_def:
442 gcc_assert (relevance == vect_used_in_outer
443 || relevance == vect_used_in_outer_by_reduction
444 || relevance == vect_unused_in_loop);
447 case vect_induction_def:
448 case vect_constant_def:
449 case vect_invariant_def:
450 case vect_unknown_def_type:
455 if (STMT_VINFO_RELEVANT_P (stmt_info))
457 gcc_assert (GIMPLE_STMT_P (stmt)
458 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
459 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
460 need_to_vectorize = true;
464 if (STMT_VINFO_RELEVANT_P (stmt_info)
465 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
466 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
467 || vectorizable_type_demotion (stmt, NULL, NULL)
468 || vectorizable_conversion (stmt, NULL, NULL, NULL)
469 || vectorizable_operation (stmt, NULL, NULL, NULL)
470 || vectorizable_assignment (stmt, NULL, NULL, NULL)
471 || vectorizable_load (stmt, NULL, NULL, NULL)
472 || vectorizable_call (stmt, NULL, NULL)
473 || vectorizable_store (stmt, NULL, NULL, NULL)
474 || vectorizable_condition (stmt, NULL, NULL)
475 || vectorizable_reduction (stmt, NULL, NULL));
479 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
481 fprintf (vect_dump, "not vectorized: relevant stmt not ");
482 fprintf (vect_dump, "supported: ");
483 print_generic_expr (vect_dump, stmt, TDF_SLIM);
488 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
489 need extra handling, except for vectorizable reductions. */
490 if (STMT_VINFO_LIVE_P (stmt_info)
491 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
492 ok = vectorizable_live_operation (stmt, NULL, NULL);
496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
498 fprintf (vect_dump, "not vectorized: live stmt not ");
499 fprintf (vect_dump, "supported: ");
500 print_generic_expr (vect_dump, stmt, TDF_SLIM);
505 if (!PURE_SLP_STMT (stmt_info))
507 /* STMT needs loop-based vectorization. */
508 only_slp_in_loop = false;
510 /* Groups of strided accesses whose size is not a power of 2 are
511 not vectorizable yet using loop-vectorization. Therefore, if
512 this stmt feeds non-SLP-able stmts (i.e., this stmt has to be
513 both SLPed and loop-based vectorzed), the loop cannot be
515 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
516 && exact_log2 (DR_GROUP_SIZE (vinfo_for_stmt (
517 DR_GROUP_FIRST_DR (stmt_info)))) == -1)
519 if (vect_print_dump_info (REPORT_DETAILS))
521 fprintf (vect_dump, "not vectorized: the size of group "
522 "of strided accesses is not a power of 2");
523 print_generic_expr (vect_dump, stmt, TDF_SLIM);
531 /* All operations in the loop are either irrelevant (deal with loop
532 control, or dead), or only used outside the loop and can be moved
533 out of the loop (e.g. invariants, inductions). The loop can be
534 optimized away by scalar optimizations. We're better off not
535 touching this loop. */
536 if (!need_to_vectorize)
538 if (vect_print_dump_info (REPORT_DETAILS))
540 "All the computation can be taken out of the loop.");
541 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
543 "not vectorized: redundant loop. no profit to vectorize.");
547 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
548 vectorization factor of the loop is the unrolling factor required by the
549 SLP instances. If that unrolling factor is 1, we say, that we perform
550 pure SLP on loop - cross iteration parallelism is not exploited. */
551 if (only_slp_in_loop)
552 vectorization_factor = LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo);
554 vectorization_factor = least_common_multiple (vectorization_factor,
555 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
557 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
559 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
560 && vect_print_dump_info (REPORT_DETAILS))
562 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
563 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
565 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
566 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
568 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
569 fprintf (vect_dump, "not vectorized: iteration count too small.");
570 if (vect_print_dump_info (REPORT_DETAILS))
571 fprintf (vect_dump,"not vectorized: iteration count smaller than "
572 "vectorization factor.");
576 /* Analyze cost. Decide if worth while to vectorize. */
578 /* Once VF is set, SLP costs should be updated since the number of created
579 vector stmts depends on VF. */
580 vect_update_slp_costs_according_to_vf (loop_vinfo);
582 min_profitable_iters = vect_estimate_min_profitable_iters (loop_vinfo);
583 LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo) = min_profitable_iters;
585 if (min_profitable_iters < 0)
587 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
588 fprintf (vect_dump, "not vectorized: vectorization not profitable.");
589 if (vect_print_dump_info (REPORT_DETAILS))
590 fprintf (vect_dump, "not vectorized: vector version will never be "
595 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
596 * vectorization_factor) - 1);
598 /* Use the cost model only if it is more conservative than user specified
601 th = (unsigned) min_scalar_loop_bound;
602 if (min_profitable_iters
603 && (!min_scalar_loop_bound
604 || min_profitable_iters > min_scalar_loop_bound))
605 th = (unsigned) min_profitable_iters;
607 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
608 && LOOP_VINFO_INT_NITERS (loop_vinfo) <= th)
610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
611 fprintf (vect_dump, "not vectorized: vectorization not "
613 if (vect_print_dump_info (REPORT_DETAILS))
614 fprintf (vect_dump, "not vectorized: iteration count smaller than "
615 "user specified loop bound parameter or minimum "
616 "profitable iterations (whichever is more conservative).");
620 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
621 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
622 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
624 if (vect_print_dump_info (REPORT_DETAILS))
625 fprintf (vect_dump, "epilog loop required.");
626 if (!vect_can_advance_ivs_p (loop_vinfo))
628 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
630 "not vectorized: can't create epilog loop 1.");
633 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
635 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
637 "not vectorized: can't create epilog loop 2.");
646 /* Function exist_non_indexing_operands_for_use_p
648 USE is one of the uses attached to STMT. Check if USE is
649 used in STMT for anything other than indexing an array. */
652 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
655 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
657 /* USE corresponds to some operand in STMT. If there is no data
658 reference in STMT, then any operand that corresponds to USE
659 is not indexing an array. */
660 if (!STMT_VINFO_DATA_REF (stmt_info))
663 /* STMT has a data_ref. FORNOW this means that its of one of
667 (This should have been verified in analyze_data_refs).
669 'var' in the second case corresponds to a def, not a use,
670 so USE cannot correspond to any operands that are not used
673 Therefore, all we need to check is if STMT falls into the
674 first case, and whether var corresponds to USE. */
676 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
679 operand = GIMPLE_STMT_OPERAND (stmt, 1);
681 if (TREE_CODE (operand) != SSA_NAME)
691 /* Function vect_analyze_scalar_cycles_1.
693 Examine the cross iteration def-use cycles of scalar variables
694 in LOOP. LOOP_VINFO represents the loop that is noe being
695 considered for vectorization (can be LOOP, or an outer-loop
699 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
702 basic_block bb = loop->header;
704 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
709 /* First - identify all inductions. */
710 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
712 tree access_fn = NULL;
713 tree def = PHI_RESULT (phi);
714 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
716 if (vect_print_dump_info (REPORT_DETAILS))
718 fprintf (vect_dump, "Analyze phi: ");
719 print_generic_expr (vect_dump, phi, TDF_SLIM);
722 /* Skip virtual phi's. The data dependences that are associated with
723 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
724 if (!is_gimple_reg (SSA_NAME_VAR (def)))
727 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
729 /* Analyze the evolution function. */
730 access_fn = analyze_scalar_evolution (loop, def);
731 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
733 fprintf (vect_dump, "Access function of PHI: ");
734 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
738 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
740 VEC_safe_push (tree, heap, worklist, phi);
744 if (vect_print_dump_info (REPORT_DETAILS))
745 fprintf (vect_dump, "Detected induction.");
746 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
750 /* Second - identify all reductions. */
751 while (VEC_length (tree, worklist) > 0)
753 tree phi = VEC_pop (tree, worklist);
754 tree def = PHI_RESULT (phi);
755 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
758 if (vect_print_dump_info (REPORT_DETAILS))
760 fprintf (vect_dump, "Analyze phi: ");
761 print_generic_expr (vect_dump, phi, TDF_SLIM);
764 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
765 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
767 reduc_stmt = vect_is_simple_reduction (loop_vinfo, phi);
770 if (vect_print_dump_info (REPORT_DETAILS))
771 fprintf (vect_dump, "Detected reduction.");
772 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
773 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
777 if (vect_print_dump_info (REPORT_DETAILS))
778 fprintf (vect_dump, "Unknown def-use cycle pattern.");
781 VEC_free (tree, heap, worklist);
786 /* Function vect_analyze_scalar_cycles.
788 Examine the cross iteration def-use cycles of scalar variables, by
789 analyzing the loop-header PHIs of scalar variables; Classify each
790 cycle as one of the following: invariant, induction, reduction, unknown.
791 We do that for the loop represented by LOOP_VINFO, and also to its
792 inner-loop, if exists.
793 Examples for scalar cycles:
808 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
812 vect_analyze_scalar_cycles_1 (loop_vinfo, loop);
814 /* When vectorizing an outer-loop, the inner-loop is executed sequentially.
815 Reductions in such inner-loop therefore have different properties than
816 the reductions in the nest that gets vectorized:
817 1. When vectorized, they are executed in the same order as in the original
818 scalar loop, so we can't change the order of computation when
820 2. FIXME: Inner-loop reductions can be used in the inner-loop, so the
821 current checks are too strict. */
824 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
828 /* Function vect_insert_into_interleaving_chain.
830 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
833 vect_insert_into_interleaving_chain (struct data_reference *dra,
834 struct data_reference *drb)
836 tree prev, next, next_init;
837 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
838 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
840 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
841 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
844 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
845 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
848 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
849 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
853 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
856 /* We got to the end of the list. Insert here. */
857 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
858 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
862 /* Function vect_update_interleaving_chain.
864 For two data-refs DRA and DRB that are a part of a chain interleaved data
865 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
867 There are four possible cases:
868 1. New stmts - both DRA and DRB are not a part of any chain:
871 2. DRB is a part of a chain and DRA is not:
872 no need to update FIRST_DR
873 no need to insert DRB
874 insert DRA according to init
875 3. DRA is a part of a chain and DRB is not:
876 if (init of FIRST_DR > init of DRB)
878 NEXT(FIRST_DR) = previous FIRST_DR
880 insert DRB according to its init
881 4. both DRA and DRB are in some interleaving chains:
882 choose the chain with the smallest init of FIRST_DR
883 insert the nodes of the second chain into the first one. */
886 vect_update_interleaving_chain (struct data_reference *drb,
887 struct data_reference *dra)
889 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
890 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
891 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
892 tree node, prev, next, node_init, first_stmt;
894 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
895 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
897 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
898 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
899 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
903 /* 2. DRB is a part of a chain and DRA is not. */
904 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
906 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
907 /* Insert DRA into the chain of DRB. */
908 vect_insert_into_interleaving_chain (dra, drb);
912 /* 3. DRA is a part of a chain and DRB is not. */
913 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
915 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
916 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
920 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
922 /* DRB's init is smaller than the init of the stmt previously marked
923 as the first stmt of the interleaving chain of DRA. Therefore, we
924 update FIRST_STMT and put DRB in the head of the list. */
925 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
926 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
928 /* Update all the stmts in the list to point to the new FIRST_STMT. */
929 tmp = old_first_stmt;
932 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
933 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
938 /* Insert DRB in the list of DRA. */
939 vect_insert_into_interleaving_chain (drb, dra);
940 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
945 /* 4. both DRA and DRB are in some interleaving chains. */
946 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
947 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
948 if (first_a == first_b)
950 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
951 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
953 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
955 /* Insert the nodes of DRA chain into the DRB chain.
956 After inserting a node, continue from this node of the DRB chain (don't
957 start from the beginning. */
958 node = DR_GROUP_FIRST_DR (stmtinfo_a);
959 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
960 first_stmt = first_b;
964 /* Insert the nodes of DRB chain into the DRA chain.
965 After inserting a node, continue from this node of the DRA chain (don't
966 start from the beginning. */
967 node = DR_GROUP_FIRST_DR (stmtinfo_b);
968 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
969 first_stmt = first_a;
974 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
975 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
978 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
979 if (tree_int_cst_compare (next_init, node_init) > 0)
982 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
983 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
988 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
992 /* We got to the end of the list. Insert here. */
993 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
994 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
997 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
998 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
1003 /* Function vect_equal_offsets.
1005 Check if OFFSET1 and OFFSET2 are identical expressions. */
1008 vect_equal_offsets (tree offset1, tree offset2)
1012 STRIP_NOPS (offset1);
1013 STRIP_NOPS (offset2);
1015 if (offset1 == offset2)
1018 if (TREE_CODE (offset1) != TREE_CODE (offset2)
1019 || !BINARY_CLASS_P (offset1)
1020 || !BINARY_CLASS_P (offset2))
1023 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
1024 TREE_OPERAND (offset2, 0));
1025 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
1026 TREE_OPERAND (offset2, 1));
1028 return (res0 && res1);
1032 /* Function vect_check_interleaving.
1034 Check if DRA and DRB are a part of interleaving. In case they are, insert
1035 DRA and DRB in an interleaving chain. */
1038 vect_check_interleaving (struct data_reference *dra,
1039 struct data_reference *drb)
1041 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
1043 /* Check that the data-refs have same first location (except init) and they
1044 are both either store or load (not load and store). */
1045 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
1046 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
1047 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
1048 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
1049 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
1050 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
1051 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
1052 || DR_IS_READ (dra) != DR_IS_READ (drb))
1056 1. data-refs are of the same type
1057 2. their steps are equal
1058 3. the step is greater than the difference between data-refs' inits */
1059 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
1060 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
1062 if (type_size_a != type_size_b
1063 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
1064 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
1065 TREE_TYPE (DR_REF (drb))))
1068 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
1069 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
1070 step = TREE_INT_CST_LOW (DR_STEP (dra));
1072 if (init_a > init_b)
1074 /* If init_a == init_b + the size of the type * k, we have an interleaving,
1075 and DRB is accessed before DRA. */
1076 diff_mod_size = (init_a - init_b) % type_size_a;
1078 if ((init_a - init_b) > step)
1081 if (diff_mod_size == 0)
1083 vect_update_interleaving_chain (drb, dra);
1084 if (vect_print_dump_info (REPORT_DR_DETAILS))
1086 fprintf (vect_dump, "Detected interleaving ");
1087 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1088 fprintf (vect_dump, " and ");
1089 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1096 /* If init_b == init_a + the size of the type * k, we have an
1097 interleaving, and DRA is accessed before DRB. */
1098 diff_mod_size = (init_b - init_a) % type_size_a;
1100 if ((init_b - init_a) > step)
1103 if (diff_mod_size == 0)
1105 vect_update_interleaving_chain (dra, drb);
1106 if (vect_print_dump_info (REPORT_DR_DETAILS))
1108 fprintf (vect_dump, "Detected interleaving ");
1109 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1110 fprintf (vect_dump, " and ");
1111 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1118 /* Check if data references pointed by DR_I and DR_J are same or
1119 belong to same interleaving group. Return FALSE if drs are
1120 different, otherwise return TRUE. */
1123 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
1125 tree stmt_i = DR_STMT (dr_i);
1126 tree stmt_j = DR_STMT (dr_j);
1128 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
1129 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1130 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
1131 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
1132 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
1138 /* If address ranges represented by DDR_I and DDR_J are equal,
1139 return TRUE, otherwise return FALSE. */
1142 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
1144 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
1145 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
1146 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
1147 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
1153 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
1154 tested at run-time. Return TRUE if DDR was successfully inserted.
1155 Return false if versioning is not supported. */
1158 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
1160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1162 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
1165 if (vect_print_dump_info (REPORT_DR_DETAILS))
1167 fprintf (vect_dump, "mark for run-time aliasing test between ");
1168 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
1169 fprintf (vect_dump, " and ");
1170 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
1175 if (vect_print_dump_info (REPORT_DR_DETAILS))
1176 fprintf (vect_dump, "versioning not supported when optimizing for size.");
1180 /* FORNOW: We don't support versioning with outer-loop vectorization. */
1183 if (vect_print_dump_info (REPORT_DR_DETAILS))
1184 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
1188 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
1192 /* Function vect_analyze_data_ref_dependence.
1194 Return TRUE if there (might) exist a dependence between a memory-reference
1195 DRA and a memory-reference DRB. When versioning for alias may check a
1196 dependence at run-time, return FALSE. */
1199 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
1200 loop_vec_info loop_vinfo)
1203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1204 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1205 struct data_reference *dra = DDR_A (ddr);
1206 struct data_reference *drb = DDR_B (ddr);
1207 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1208 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1209 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1210 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1211 lambda_vector dist_v;
1212 unsigned int loop_depth;
1214 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1216 /* Independent data accesses. */
1217 vect_check_interleaving (dra, drb);
1221 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1224 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1226 if (vect_print_dump_info (REPORT_DR_DETAILS))
1229 "versioning for alias required: can't determine dependence between ");
1230 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1231 fprintf (vect_dump, " and ");
1232 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1234 /* Add to list of ddrs that need to be tested at run-time. */
1235 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1238 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1240 if (vect_print_dump_info (REPORT_DR_DETAILS))
1242 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
1243 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1244 fprintf (vect_dump, " and ");
1245 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1247 /* Add to list of ddrs that need to be tested at run-time. */
1248 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
1251 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1252 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1254 int dist = dist_v[loop_depth];
1256 if (vect_print_dump_info (REPORT_DR_DETAILS))
1257 fprintf (vect_dump, "dependence distance = %d.", dist);
1259 /* Same loop iteration. */
1260 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1262 /* Two references with distance zero have the same alignment. */
1263 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1264 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1265 if (vect_print_dump_info (REPORT_ALIGNMENT))
1266 fprintf (vect_dump, "accesses have the same alignment.");
1267 if (vect_print_dump_info (REPORT_DR_DETAILS))
1269 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1270 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1271 fprintf (vect_dump, " and ");
1272 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1275 /* For interleaving, mark that there is a read-write dependency if
1276 necessary. We check before that one of the data-refs is store. */
1277 if (DR_IS_READ (dra))
1278 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1281 if (DR_IS_READ (drb))
1282 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1288 if (abs (dist) >= vectorization_factor
1289 || (dist > 0 && DDR_REVERSED_P (ddr)))
1291 /* Dependence distance does not create dependence, as far as
1292 vectorization is concerned, in this case. If DDR_REVERSED_P the
1293 order of the data-refs in DDR was reversed (to make distance
1294 vector positive), and the actual distance is negative. */
1295 if (vect_print_dump_info (REPORT_DR_DETAILS))
1296 fprintf (vect_dump, "dependence distance >= VF or negative.");
1300 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1303 "not vectorized, possible dependence "
1304 "between data-refs ");
1305 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1306 fprintf (vect_dump, " and ");
1307 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1316 /* Function vect_analyze_data_ref_dependences.
1318 Examine all the data references in the loop, and make sure there do not
1319 exist any data dependences between them. */
1322 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1325 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1326 struct data_dependence_relation *ddr;
1328 if (vect_print_dump_info (REPORT_DETAILS))
1329 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1331 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1332 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1339 /* Function vect_compute_data_ref_alignment
1341 Compute the misalignment of the data reference DR.
1344 1. If during the misalignment computation it is found that the data reference
1345 cannot be vectorized then false is returned.
1346 2. DR_MISALIGNMENT (DR) is defined.
1348 FOR NOW: No analysis is actually performed. Misalignment is calculated
1349 only for trivial cases. TODO. */
1352 vect_compute_data_ref_alignment (struct data_reference *dr)
1354 tree stmt = DR_STMT (dr);
1355 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1356 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1357 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1358 tree ref = DR_REF (dr);
1360 tree base, base_addr;
1363 tree aligned_to, alignment;
1365 if (vect_print_dump_info (REPORT_DETAILS))
1366 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1368 /* Initialize misalignment to unknown. */
1369 SET_DR_MISALIGNMENT (dr, -1);
1371 misalign = DR_INIT (dr);
1372 aligned_to = DR_ALIGNED_TO (dr);
1373 base_addr = DR_BASE_ADDRESS (dr);
1375 /* In case the dataref is in an inner-loop of the loop that is being
1376 vectorized (LOOP), we use the base and misalignment information
1377 relative to the outer-loop (LOOP). This is ok only if the misalignment
1378 stays the same throughout the execution of the inner-loop, which is why
1379 we have to check that the stride of the dataref in the inner-loop evenly
1380 divides by the vector size. */
1381 if (nested_in_vect_loop_p (loop, stmt))
1383 tree step = DR_STEP (dr);
1384 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1386 if (dr_step % UNITS_PER_SIMD_WORD == 0)
1388 if (vect_print_dump_info (REPORT_ALIGNMENT))
1389 fprintf (vect_dump, "inner step divides the vector-size.");
1390 misalign = STMT_VINFO_DR_INIT (stmt_info);
1391 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
1392 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
1396 if (vect_print_dump_info (REPORT_ALIGNMENT))
1397 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
1398 misalign = NULL_TREE;
1402 base = build_fold_indirect_ref (base_addr);
1403 vectype = STMT_VINFO_VECTYPE (stmt_info);
1404 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1406 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1409 if (vect_print_dump_info (REPORT_ALIGNMENT))
1411 fprintf (vect_dump, "Unknown alignment for access: ");
1412 print_generic_expr (vect_dump, base, TDF_SLIM);
1418 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1420 || (TREE_CODE (base_addr) == SSA_NAME
1421 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1422 TREE_TYPE (base_addr)))),
1424 base_aligned = true;
1426 base_aligned = false;
1430 /* Do not change the alignment of global variables if
1431 flag_section_anchors is enabled. */
1432 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1433 || (TREE_STATIC (base) && flag_section_anchors))
1435 if (vect_print_dump_info (REPORT_DETAILS))
1437 fprintf (vect_dump, "can't force alignment of ref: ");
1438 print_generic_expr (vect_dump, ref, TDF_SLIM);
1443 /* Force the alignment of the decl.
1444 NOTE: This is the only change to the code we make during
1445 the analysis phase, before deciding to vectorize the loop. */
1446 if (vect_print_dump_info (REPORT_DETAILS))
1447 fprintf (vect_dump, "force alignment");
1448 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1449 DECL_USER_ALIGN (base) = 1;
1452 /* At this point we assume that the base is aligned. */
1453 gcc_assert (base_aligned
1454 || (TREE_CODE (base) == VAR_DECL
1455 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1457 /* Modulo alignment. */
1458 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1460 if (!host_integerp (misalign, 1))
1462 /* Negative or overflowed misalignment value. */
1463 if (vect_print_dump_info (REPORT_DETAILS))
1464 fprintf (vect_dump, "unexpected misalign value");
1468 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1470 if (vect_print_dump_info (REPORT_DETAILS))
1472 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1473 print_generic_expr (vect_dump, ref, TDF_SLIM);
1480 /* Function vect_compute_data_refs_alignment
1482 Compute the misalignment of data references in the loop.
1483 Return FALSE if a data reference is found that cannot be vectorized. */
1486 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1488 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1489 struct data_reference *dr;
1492 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1493 if (!vect_compute_data_ref_alignment (dr))
1500 /* Function vect_update_misalignment_for_peel
1502 DR - the data reference whose misalignment is to be adjusted.
1503 DR_PEEL - the data reference whose misalignment is being made
1504 zero in the vector loop by the peel.
1505 NPEEL - the number of iterations in the peel loop if the misalignment
1506 of DR_PEEL is known at compile time. */
1509 vect_update_misalignment_for_peel (struct data_reference *dr,
1510 struct data_reference *dr_peel, int npeel)
1513 VEC(dr_p,heap) *same_align_drs;
1514 struct data_reference *current_dr;
1515 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1516 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1517 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1518 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1520 /* For interleaved data accesses the step in the loop must be multiplied by
1521 the size of the interleaving group. */
1522 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1523 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1524 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1525 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1527 /* It can be assumed that the data refs with the same alignment as dr_peel
1528 are aligned in the vector loop. */
1530 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1531 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1533 if (current_dr != dr)
1535 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1536 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1537 SET_DR_MISALIGNMENT (dr, 0);
1541 if (known_alignment_for_access_p (dr)
1542 && known_alignment_for_access_p (dr_peel))
1544 int misal = DR_MISALIGNMENT (dr);
1545 misal += npeel * dr_size;
1546 misal %= UNITS_PER_SIMD_WORD;
1547 SET_DR_MISALIGNMENT (dr, misal);
1551 if (vect_print_dump_info (REPORT_DETAILS))
1552 fprintf (vect_dump, "Setting misalignment to -1.");
1553 SET_DR_MISALIGNMENT (dr, -1);
1557 /* Function vect_verify_datarefs_alignment
1559 Return TRUE if all data references in the loop can be
1560 handled with respect to alignment. */
1563 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1565 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1566 struct data_reference *dr;
1567 enum dr_alignment_support supportable_dr_alignment;
1570 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1572 tree stmt = DR_STMT (dr);
1573 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1575 /* For interleaving, only the alignment of the first access matters. */
1576 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1577 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1580 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1581 if (!supportable_dr_alignment)
1583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1585 if (DR_IS_READ (dr))
1587 "not vectorized: unsupported unaligned load.");
1590 "not vectorized: unsupported unaligned store.");
1594 if (supportable_dr_alignment != dr_aligned
1595 && vect_print_dump_info (REPORT_ALIGNMENT))
1596 fprintf (vect_dump, "Vectorizing an unaligned access.");
1602 /* Function vector_alignment_reachable_p
1604 Return true if vector alignment for DR is reachable by peeling
1605 a few loop iterations. Return false otherwise. */
1608 vector_alignment_reachable_p (struct data_reference *dr)
1610 tree stmt = DR_STMT (dr);
1611 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1612 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1614 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1616 /* For interleaved access we peel only if number of iterations in
1617 the prolog loop ({VF - misalignment}), is a multiple of the
1618 number of the interleaved accesses. */
1619 int elem_size, mis_in_elements;
1620 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1622 /* FORNOW: handle only known alignment. */
1623 if (!known_alignment_for_access_p (dr))
1626 elem_size = UNITS_PER_SIMD_WORD / nelements;
1627 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1629 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1633 /* If misalignment is known at the compile time then allow peeling
1634 only if natural alignment is reachable through peeling. */
1635 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1637 HOST_WIDE_INT elmsize =
1638 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1639 if (vect_print_dump_info (REPORT_DETAILS))
1641 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1642 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1644 if (DR_MISALIGNMENT (dr) % elmsize)
1646 if (vect_print_dump_info (REPORT_DETAILS))
1647 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1652 if (!known_alignment_for_access_p (dr))
1654 tree type = (TREE_TYPE (DR_REF (dr)));
1655 tree ba = DR_BASE_OBJECT (dr);
1656 bool is_packed = false;
1659 is_packed = contains_packed_reference (ba);
1661 if (vect_print_dump_info (REPORT_DETAILS))
1662 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1663 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1672 /* Function vect_enhance_data_refs_alignment
1674 This pass will use loop versioning and loop peeling in order to enhance
1675 the alignment of data references in the loop.
1677 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1678 original loop is to be vectorized; Any other loops that are created by
1679 the transformations performed in this pass - are not supposed to be
1680 vectorized. This restriction will be relaxed.
1682 This pass will require a cost model to guide it whether to apply peeling
1683 or versioning or a combination of the two. For example, the scheme that
1684 intel uses when given a loop with several memory accesses, is as follows:
1685 choose one memory access ('p') which alignment you want to force by doing
1686 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1687 other accesses are not necessarily aligned, or (2) use loop versioning to
1688 generate one loop in which all accesses are aligned, and another loop in
1689 which only 'p' is necessarily aligned.
1691 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1692 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1693 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1695 Devising a cost model is the most critical aspect of this work. It will
1696 guide us on which access to peel for, whether to use loop versioning, how
1697 many versions to create, etc. The cost model will probably consist of
1698 generic considerations as well as target specific considerations (on
1699 powerpc for example, misaligned stores are more painful than misaligned
1702 Here are the general steps involved in alignment enhancements:
1704 -- original loop, before alignment analysis:
1705 for (i=0; i<N; i++){
1706 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1707 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1710 -- After vect_compute_data_refs_alignment:
1711 for (i=0; i<N; i++){
1712 x = q[i]; # DR_MISALIGNMENT(q) = 3
1713 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1716 -- Possibility 1: we do loop versioning:
1718 for (i=0; i<N; i++){ # loop 1A
1719 x = q[i]; # DR_MISALIGNMENT(q) = 3
1720 p[i] = y; # DR_MISALIGNMENT(p) = 0
1724 for (i=0; i<N; i++){ # loop 1B
1725 x = q[i]; # DR_MISALIGNMENT(q) = 3
1726 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1730 -- Possibility 2: we do loop peeling:
1731 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1735 for (i = 3; i < N; i++){ # loop 2A
1736 x = q[i]; # DR_MISALIGNMENT(q) = 0
1737 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1740 -- Possibility 3: combination of loop peeling and versioning:
1741 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1746 for (i = 3; i<N; i++){ # loop 3A
1747 x = q[i]; # DR_MISALIGNMENT(q) = 0
1748 p[i] = y; # DR_MISALIGNMENT(p) = 0
1752 for (i = 3; i<N; i++){ # loop 3B
1753 x = q[i]; # DR_MISALIGNMENT(q) = 0
1754 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1758 These loops are later passed to loop_transform to be vectorized. The
1759 vectorizer will use the alignment information to guide the transformation
1760 (whether to generate regular loads/stores, or with special handling for
1764 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1766 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1767 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1768 enum dr_alignment_support supportable_dr_alignment;
1769 struct data_reference *dr0 = NULL;
1770 struct data_reference *dr;
1772 bool do_peeling = false;
1773 bool do_versioning = false;
1776 stmt_vec_info stmt_info;
1777 int vect_versioning_for_alias_required;
1779 if (vect_print_dump_info (REPORT_DETAILS))
1780 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1782 /* While cost model enhancements are expected in the future, the high level
1783 view of the code at this time is as follows:
1785 A) If there is a misaligned write then see if peeling to align this write
1786 can make all data references satisfy vect_supportable_dr_alignment.
1787 If so, update data structures as needed and return true. Note that
1788 at this time vect_supportable_dr_alignment is known to return false
1789 for a misaligned write.
1791 B) If peeling wasn't possible and there is a data reference with an
1792 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1793 then see if loop versioning checks can be used to make all data
1794 references satisfy vect_supportable_dr_alignment. If so, update
1795 data structures as needed and return true.
1797 C) If neither peeling nor versioning were successful then return false if
1798 any data reference does not satisfy vect_supportable_dr_alignment.
1800 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1802 Note, Possibility 3 above (which is peeling and versioning together) is not
1803 being done at this time. */
1805 /* (1) Peeling to force alignment. */
1807 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1809 + How many accesses will become aligned due to the peeling
1810 - How many accesses will become unaligned due to the peeling,
1811 and the cost of misaligned accesses.
1812 - The cost of peeling (the extra runtime checks, the increase
1815 The scheme we use FORNOW: peel to force the alignment of the first
1816 misaligned store in the loop.
1817 Rationale: misaligned stores are not yet supported.
1819 TODO: Use a cost model. */
1821 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1823 stmt = DR_STMT (dr);
1824 stmt_info = vinfo_for_stmt (stmt);
1826 /* For interleaving, only the alignment of the first access
1828 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1829 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1832 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1834 do_peeling = vector_alignment_reachable_p (dr);
1837 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1838 fprintf (vect_dump, "vector alignment may not be reachable");
1843 vect_versioning_for_alias_required =
1844 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1846 /* Temporarily, if versioning for alias is required, we disable peeling
1847 until we support peeling and versioning. Often peeling for alignment
1848 will require peeling for loop-bound, which in turn requires that we
1849 know how to adjust the loop ivs after the loop. */
1850 if (vect_versioning_for_alias_required
1851 || !vect_can_advance_ivs_p (loop_vinfo)
1852 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1859 tree stmt = DR_STMT (dr0);
1860 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1861 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1862 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1864 if (known_alignment_for_access_p (dr0))
1866 /* Since it's known at compile time, compute the number of iterations
1867 in the peeled loop (the peeling factor) for use in updating
1868 DR_MISALIGNMENT values. The peeling factor is the vectorization
1869 factor minus the misalignment as an element count. */
1870 mis = DR_MISALIGNMENT (dr0);
1871 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1872 npeel = nelements - mis;
1874 /* For interleaved data access every iteration accesses all the
1875 members of the group, therefore we divide the number of iterations
1876 by the group size. */
1877 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1878 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1879 npeel /= DR_GROUP_SIZE (stmt_info);
1881 if (vect_print_dump_info (REPORT_DETAILS))
1882 fprintf (vect_dump, "Try peeling by %d", npeel);
1885 /* Ensure that all data refs can be vectorized after the peel. */
1886 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1888 int save_misalignment;
1893 stmt = DR_STMT (dr);
1894 stmt_info = vinfo_for_stmt (stmt);
1895 /* For interleaving, only the alignment of the first access
1897 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1898 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1901 save_misalignment = DR_MISALIGNMENT (dr);
1902 vect_update_misalignment_for_peel (dr, dr0, npeel);
1903 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1904 SET_DR_MISALIGNMENT (dr, save_misalignment);
1906 if (!supportable_dr_alignment)
1915 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1916 If the misalignment of DR_i is identical to that of dr0 then set
1917 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1918 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1919 by the peeling factor times the element size of DR_i (MOD the
1920 vectorization factor times the size). Otherwise, the
1921 misalignment of DR_i must be set to unknown. */
1922 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1924 vect_update_misalignment_for_peel (dr, dr0, npeel);
1926 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1927 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1928 SET_DR_MISALIGNMENT (dr0, 0);
1929 if (vect_print_dump_info (REPORT_ALIGNMENT))
1930 fprintf (vect_dump, "Alignment of access forced using peeling.");
1932 if (vect_print_dump_info (REPORT_DETAILS))
1933 fprintf (vect_dump, "Peeling for alignment will be applied.");
1935 stat = vect_verify_datarefs_alignment (loop_vinfo);
1942 /* (2) Versioning to force alignment. */
1944 /* Try versioning if:
1945 1) flag_tree_vect_loop_version is TRUE
1946 2) optimize_size is FALSE
1947 3) there is at least one unsupported misaligned data ref with an unknown
1949 4) all misaligned data refs with a known misalignment are supported, and
1950 5) the number of runtime alignment checks is within reason. */
1953 flag_tree_vect_loop_version
1955 && (!loop->inner); /* FORNOW */
1959 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1961 stmt = DR_STMT (dr);
1962 stmt_info = vinfo_for_stmt (stmt);
1964 /* For interleaving, only the alignment of the first access
1966 if (aligned_access_p (dr)
1967 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1968 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1971 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1973 if (!supportable_dr_alignment)
1979 if (known_alignment_for_access_p (dr)
1980 || VEC_length (tree,
1981 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1982 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1984 do_versioning = false;
1988 stmt = DR_STMT (dr);
1989 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1990 gcc_assert (vectype);
1992 /* The rightmost bits of an aligned address must be zeros.
1993 Construct the mask needed for this test. For example,
1994 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1995 mask must be 15 = 0xf. */
1996 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1998 /* FORNOW: use the same mask to test all potentially unaligned
1999 references in the loop. The vectorizer currently supports
2000 a single vector size, see the reference to
2001 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2002 vectorization factor is computed. */
2003 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2004 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2005 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
2006 VEC_safe_push (tree, heap,
2007 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
2012 /* Versioning requires at least one misaligned data reference. */
2013 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
2014 do_versioning = false;
2015 else if (!do_versioning)
2016 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
2021 VEC(tree,heap) *may_misalign_stmts
2022 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2025 /* It can now be assumed that the data references in the statements
2026 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2027 of the loop being vectorized. */
2028 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
2030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2031 dr = STMT_VINFO_DATA_REF (stmt_info);
2032 SET_DR_MISALIGNMENT (dr, 0);
2033 if (vect_print_dump_info (REPORT_ALIGNMENT))
2034 fprintf (vect_dump, "Alignment of access forced using versioning.");
2037 if (vect_print_dump_info (REPORT_DETAILS))
2038 fprintf (vect_dump, "Versioning for alignment will be applied.");
2040 /* Peeling and versioning can't be done together at this time. */
2041 gcc_assert (! (do_peeling && do_versioning));
2043 stat = vect_verify_datarefs_alignment (loop_vinfo);
2048 /* This point is reached if neither peeling nor versioning is being done. */
2049 gcc_assert (! (do_peeling || do_versioning));
2051 stat = vect_verify_datarefs_alignment (loop_vinfo);
2056 /* Function vect_analyze_data_refs_alignment
2058 Analyze the alignment of the data-references in the loop.
2059 Return FALSE if a data reference is found that cannot be vectorized. */
2062 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
2064 if (vect_print_dump_info (REPORT_DETAILS))
2065 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2067 if (!vect_compute_data_refs_alignment (loop_vinfo))
2069 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2071 "not vectorized: can't calculate alignment for data ref.");
2079 /* Analyze groups of strided accesses: check that DR belongs to a group of
2080 strided accesses of legal size, step, etc. Detect gaps, single element
2081 interleaving, and other special cases. Set strided access info.
2082 Collect groups of strided stores for further use in SLP analysis. */
2085 vect_analyze_group_access (struct data_reference *dr)
2087 tree step = DR_STEP (dr);
2088 tree scalar_type = TREE_TYPE (DR_REF (dr));
2089 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2090 tree stmt = DR_STMT (dr);
2091 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2092 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2093 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2094 HOST_WIDE_INT stride;
2095 bool slp_impossible = false;
2097 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2098 interleaving group (including gaps). */
2099 stride = dr_step / type_size;
2101 /* Not consecutive access is possible only if it is a part of interleaving. */
2102 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2104 /* Check if it this DR is a part of interleaving, and is a single
2105 element of the group that is accessed in the loop. */
2107 /* Gaps are supported only for loads. STEP must be a multiple of the type
2108 size. The size of the group must be a power of 2. */
2110 && (dr_step % type_size) == 0
2112 && exact_log2 (stride) != -1)
2114 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2115 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2116 if (vect_print_dump_info (REPORT_DR_DETAILS))
2118 fprintf (vect_dump, "Detected single element interleaving %d ",
2119 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
2120 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2121 fprintf (vect_dump, " step ");
2122 print_generic_expr (vect_dump, step, TDF_SLIM);
2126 if (vect_print_dump_info (REPORT_DETAILS))
2127 fprintf (vect_dump, "not consecutive access");
2131 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2133 /* First stmt in the interleaving chain. Check the chain. */
2134 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2135 struct data_reference *data_ref = dr;
2136 unsigned int count = 1;
2138 tree prev_init = DR_INIT (data_ref);
2140 HOST_WIDE_INT diff, count_in_bytes;
2144 /* Skip same data-refs. In case that two or more stmts share data-ref
2145 (supported only for loads), we vectorize only the first stmt, and
2146 the rest get their vectorized loads from the first one. */
2147 if (!tree_int_cst_compare (DR_INIT (data_ref),
2148 DR_INIT (STMT_VINFO_DATA_REF (
2149 vinfo_for_stmt (next)))))
2151 if (!DR_IS_READ (data_ref))
2153 if (vect_print_dump_info (REPORT_DETAILS))
2154 fprintf (vect_dump, "Two store stmts share the same dr.");
2158 /* Check that there is no load-store dependencies for this loads
2159 to prevent a case of load-store-load to the same location. */
2160 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2161 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2163 if (vect_print_dump_info (REPORT_DETAILS))
2165 "READ_WRITE dependence in interleaving.");
2169 /* For load use the same data-ref load. */
2170 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2173 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2178 /* Check that all the accesses have the same STEP. */
2179 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2180 if (tree_int_cst_compare (step, next_step))
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump, "not consecutive access in interleaving");
2187 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2188 /* Check that the distance between two accesses is equal to the type
2189 size. Otherwise, we have gaps. */
2190 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2191 - TREE_INT_CST_LOW (prev_init)) / type_size;
2194 /* FORNOW: SLP of accesses with gaps is not supported. */
2195 slp_impossible = true;
2196 if (!DR_IS_READ (data_ref))
2198 if (vect_print_dump_info (REPORT_DETAILS))
2199 fprintf (vect_dump, "interleaved store with gaps");
2204 /* Store the gap from the previous member of the group. If there is no
2205 gap in the access, DR_GROUP_GAP is always 1. */
2206 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2208 prev_init = DR_INIT (data_ref);
2209 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2210 /* Count the number of data-refs in the chain. */
2214 /* COUNT is the number of accesses found, we multiply it by the size of
2215 the type to get COUNT_IN_BYTES. */
2216 count_in_bytes = type_size * count;
2218 /* Check that the size of the interleaving is not greater than STEP. */
2219 if (dr_step < count_in_bytes)
2221 if (vect_print_dump_info (REPORT_DETAILS))
2223 fprintf (vect_dump, "interleaving size is greater than step for ");
2224 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2229 /* Check that the size of the interleaving is equal to STEP for stores,
2230 i.e., that there are no gaps. */
2231 if (dr_step != count_in_bytes)
2233 if (DR_IS_READ (dr))
2234 slp_impossible = true;
2237 if (vect_print_dump_info (REPORT_DETAILS))
2238 fprintf (vect_dump, "interleaved store with gaps");
2243 /* Check that STEP is a multiple of type size. */
2244 if ((dr_step % type_size) != 0)
2246 if (vect_print_dump_info (REPORT_DETAILS))
2248 fprintf (vect_dump, "step is not a multiple of type size: step ");
2249 print_generic_expr (vect_dump, step, TDF_SLIM);
2250 fprintf (vect_dump, " size ");
2251 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2257 /* FORNOW: we handle only interleaving that is a power of 2.
2258 We don't fail here if it may be still possible to vectorize the
2259 group using SLP. If not, the size of the group will be checked in
2260 vect_analyze_operations, and the vectorization will fail. */
2261 if (exact_log2 (stride) == -1)
2263 if (vect_print_dump_info (REPORT_DETAILS))
2264 fprintf (vect_dump, "interleaving is not a power of 2");
2269 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2270 if (vect_print_dump_info (REPORT_DETAILS))
2271 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2273 /* SLP: create an SLP data structure for every interleaving group of
2274 stores for further analysis in vect_analyse_slp. */
2275 if (!DR_IS_READ (dr) && !slp_impossible)
2276 VEC_safe_push (tree, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
2283 /* Analyze the access pattern of the data-reference DR.
2284 In case of non-consecutive accesses call vect_analyze_group_access() to
2285 analyze groups of strided accesses. */
2288 vect_analyze_data_ref_access (struct data_reference *dr)
2290 tree step = DR_STEP (dr);
2291 tree scalar_type = TREE_TYPE (DR_REF (dr));
2292 tree stmt = DR_STMT (dr);
2293 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2294 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2295 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2296 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2300 if (vect_print_dump_info (REPORT_DETAILS))
2301 fprintf (vect_dump, "bad data-ref access");
2305 /* Don't allow invariant accesses. */
2309 if (nested_in_vect_loop_p (loop, stmt))
2311 /* Interleaved accesses are not yet supported within outer-loop
2312 vectorization for references in the inner-loop. */
2313 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2315 /* For the rest of the analysis we use the outer-loop step. */
2316 step = STMT_VINFO_DR_STEP (stmt_info);
2317 dr_step = TREE_INT_CST_LOW (step);
2321 if (vect_print_dump_info (REPORT_ALIGNMENT))
2322 fprintf (vect_dump, "zero step in outer loop.");
2323 if (DR_IS_READ (dr))
2331 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
2333 /* Mark that it is not interleaving. */
2334 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
2338 if (nested_in_vect_loop_p (loop, stmt))
2340 if (vect_print_dump_info (REPORT_ALIGNMENT))
2341 fprintf (vect_dump, "strided access in outer loop.");
2345 /* Not consecutive access - check if it's a part of interleaving group. */
2346 return vect_analyze_group_access (dr);
2350 /* Function vect_analyze_data_ref_accesses.
2352 Analyze the access pattern of all the data references in the loop.
2354 FORNOW: the only access pattern that is considered vectorizable is a
2355 simple step 1 (consecutive) access.
2357 FORNOW: handle only arrays and pointer accesses. */
2360 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
2363 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2364 struct data_reference *dr;
2366 if (vect_print_dump_info (REPORT_DETAILS))
2367 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2369 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2370 if (!vect_analyze_data_ref_access (dr))
2372 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2373 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2380 /* Function vect_prune_runtime_alias_test_list.
2382 Prune a list of ddrs to be tested at run-time by versioning for alias.
2383 Return FALSE if resulting list of ddrs is longer then allowed by
2384 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2387 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2389 VEC (ddr_p, heap) * ddrs =
2390 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2396 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2401 ddr_i = VEC_index (ddr_p, ddrs, i);
2404 for (j = 0; j < i; j++)
2406 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2408 if (vect_vfa_range_equal (ddr_i, ddr_j))
2410 if (vect_print_dump_info (REPORT_DR_DETAILS))
2412 fprintf (vect_dump, "found equal ranges ");
2413 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2414 fprintf (vect_dump, ", ");
2415 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2416 fprintf (vect_dump, " and ");
2417 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2418 fprintf (vect_dump, ", ");
2419 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2428 VEC_ordered_remove (ddr_p, ddrs, i);
2434 if (VEC_length (ddr_p, ddrs) >
2435 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2437 if (vect_print_dump_info (REPORT_DR_DETAILS))
2440 "disable versioning for alias - max number of generated "
2441 "checks exceeded.");
2444 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2452 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */
2455 vect_free_slp_tree (slp_tree node)
2460 if (SLP_TREE_LEFT (node))
2461 vect_free_slp_tree (SLP_TREE_LEFT (node));
2463 if (SLP_TREE_RIGHT (node))
2464 vect_free_slp_tree (SLP_TREE_RIGHT (node));
2466 VEC_free (tree, heap, SLP_TREE_SCALAR_STMTS (node));
2468 if (SLP_TREE_VEC_STMTS (node))
2469 VEC_free (tree, heap, SLP_TREE_VEC_STMTS (node));
2475 /* Get the defs for the RHS (collect them in DEF_STMTS0/1), check that they are
2476 of a legal type and that they match the defs of the first stmt of the SLP
2477 group (stored in FIRST_STMT_...). */
2480 vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, slp_tree slp_node,
2481 tree rhs, VEC (tree, heap) **def_stmts0,
2482 VEC (tree, heap) **def_stmts1,
2483 enum vect_def_type *first_stmt_dt0,
2484 enum vect_def_type *first_stmt_dt1,
2485 tree *first_stmt_def0_type,
2486 tree *first_stmt_def1_type,
2487 tree *first_stmt_const_oprnd,
2488 int ncopies_for_cost)
2491 enum operation_type op_type = TREE_OPERAND_LENGTH (rhs);
2492 unsigned int i, number_of_oprnds = op_type;
2494 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2495 stmt_vec_info stmt_info =
2496 vinfo_for_stmt (VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0));
2500 number_of_oprnds = 1;
2502 gcc_assert (op_type == unary_op || op_type == binary_op);
2504 for (i = 0; i < number_of_oprnds; i++)
2507 oprnd = TREE_OPERAND (rhs, i);
2511 if (!vect_is_simple_use (oprnd, loop_vinfo, &def_stmt, &def, &dt[i])
2512 || (!def_stmt && dt[i] != vect_constant_def))
2514 if (vect_print_dump_info (REPORT_SLP))
2516 fprintf (vect_dump, "Build SLP failed: can't find def for ");
2517 print_generic_expr (vect_dump, oprnd, TDF_SLIM);
2523 if (!*first_stmt_dt0)
2525 /* op0 of the first stmt of the group - store its info. */
2526 *first_stmt_dt0 = dt[i];
2528 *first_stmt_def0_type = TREE_TYPE (def);
2530 *first_stmt_const_oprnd = oprnd;
2532 /* Analyze costs (for the first stmt of the group only). */
2534 /* Not memory operation (we don't call this functions for loads). */
2535 vect_model_simple_cost (stmt_info, ncopies_for_cost, dt, slp_node);
2538 vect_model_store_cost (stmt_info, ncopies_for_cost, dt[0], slp_node);
2543 if (!*first_stmt_dt1 && i == 1)
2545 /* op1 of the first stmt of the group - store its info. */
2546 *first_stmt_dt1 = dt[i];
2548 *first_stmt_def1_type = TREE_TYPE (def);
2551 /* We assume that the stmt contains only one constant
2552 operand. We fail otherwise, to be on the safe side. */
2553 if (*first_stmt_const_oprnd)
2555 if (vect_print_dump_info (REPORT_SLP))
2556 fprintf (vect_dump, "Build SLP failed: two constant "
2560 *first_stmt_const_oprnd = oprnd;
2565 /* Not first stmt of the group, check that the def-stmt/s match
2566 the def-stmt/s of the first stmt. */
2568 && (*first_stmt_dt0 != dt[i]
2569 || (*first_stmt_def0_type && def
2570 && *first_stmt_def0_type != TREE_TYPE (def))))
2572 && (*first_stmt_dt1 != dt[i]
2573 || (*first_stmt_def1_type && def
2574 && *first_stmt_def1_type != TREE_TYPE (def))))
2576 && TREE_TYPE (*first_stmt_const_oprnd)
2577 != TREE_TYPE (oprnd)))
2579 if (vect_print_dump_info (REPORT_SLP))
2580 fprintf (vect_dump, "Build SLP failed: different types ");
2587 /* Check the types of the definitions. */
2590 case vect_constant_def:
2591 case vect_invariant_def:
2596 VEC_safe_push (tree, heap, *def_stmts0, def_stmt);
2598 VEC_safe_push (tree, heap, *def_stmts1, def_stmt);
2602 /* FORNOW: Not supported. */
2603 if (vect_print_dump_info (REPORT_SLP))
2605 fprintf (vect_dump, "Build SLP failed: illegal type of def ");
2606 print_generic_expr (vect_dump, def, TDF_SLIM);
2617 /* Recursively build an SLP tree starting from NODE.
2618 Fail (and return FALSE) if def-stmts are not isomorphic, require data
2619 permutation or are of unsupported types of operation. Otherwise, return
2621 SLP_IMPOSSIBLE is TRUE if it is impossible to SLP in the loop, for example
2622 in the case of multiple types for now. */
2625 vect_build_slp_tree (loop_vec_info loop_vinfo, slp_tree *node,
2626 unsigned int group_size, bool *slp_impossible,
2627 int *inside_cost, int *outside_cost,
2628 int ncopies_for_cost)
2630 VEC (tree, heap) *def_stmts0 = VEC_alloc (tree, heap, group_size);
2631 VEC (tree, heap) *def_stmts1 = VEC_alloc (tree, heap, group_size);
2633 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (*node);
2634 tree stmt = VEC_index (tree, stmts, 0);
2635 enum vect_def_type first_stmt_dt0 = 0, first_stmt_dt1 = 0;
2636 enum tree_code first_stmt_code = 0;
2637 tree first_stmt_def1_type = NULL_TREE, first_stmt_def0_type = NULL_TREE;
2638 tree lhs, rhs, prev_stmt = NULL_TREE;
2639 bool stop_recursion = false, need_same_oprnds = false;
2640 tree vectype, scalar_type, first_op1 = NULL_TREE;
2641 unsigned int vectorization_factor = 0, ncopies;
2644 enum machine_mode optab_op2_mode;
2645 enum machine_mode vec_mode;
2646 tree first_stmt_const_oprnd = NULL_TREE;
2647 struct data_reference *first_dr;
2649 /* For every stmt in NODE find its def stmt/s. */
2650 for (i = 0; VEC_iterate (tree, stmts, i, stmt); i++)
2652 if (vect_print_dump_info (REPORT_SLP))
2654 fprintf (vect_dump, "Build SLP for ");
2655 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2658 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2660 if (vect_print_dump_info (REPORT_SLP))
2662 fprintf (vect_dump, "Build SLP failed: not MODIFY_STMT ");
2663 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2669 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2670 vectype = get_vectype_for_scalar_type (scalar_type);
2673 if (vect_print_dump_info (REPORT_SLP))
2675 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2676 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2681 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2682 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2683 ncopies = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
2687 if (vect_print_dump_info (REPORT_SLP))
2688 fprintf (vect_dump, "SLP failed - multiple types ");
2690 *slp_impossible = true;
2694 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
2695 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
2697 /* Check the operation. */
2700 first_stmt_code = TREE_CODE (rhs);
2702 /* Shift arguments should be equal in all the packed stmts for a
2703 vector shift with scalar shift operand. */
2704 if (TREE_CODE (rhs) == LSHIFT_EXPR || TREE_CODE (rhs) == RSHIFT_EXPR)
2706 vec_mode = TYPE_MODE (vectype);
2707 optab = optab_for_tree_code (TREE_CODE (rhs), vectype);
2710 if (vect_print_dump_info (REPORT_SLP))
2711 fprintf (vect_dump, "Build SLP failed: no optab.");
2714 icode = (int) optab->handlers[(int) vec_mode].insn_code;
2715 if (icode == CODE_FOR_nothing)
2717 if (vect_print_dump_info (REPORT_SLP))
2719 "Build SLP failed: op not supported by target.");
2722 optab_op2_mode = insn_data[icode].operand[2].mode;
2723 if (!VECTOR_MODE_P (optab_op2_mode))
2725 need_same_oprnds = true;
2726 first_op1 = TREE_OPERAND (rhs, 1);
2732 if (first_stmt_code != TREE_CODE (rhs))
2734 if (vect_print_dump_info (REPORT_SLP))
2737 "Build SLP failed: different operation in stmt ");
2738 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2744 if (need_same_oprnds
2745 && !operand_equal_p (first_op1, TREE_OPERAND (rhs, 1), 0))
2747 if (vect_print_dump_info (REPORT_SLP))
2750 "Build SLP failed: different shift arguments in ");
2751 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2758 /* Strided store or load. */
2759 if (STMT_VINFO_STRIDED_ACCESS (vinfo_for_stmt (stmt)))
2761 if (REFERENCE_CLASS_P (lhs))
2764 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs,
2765 &def_stmts0, &def_stmts1,
2768 &first_stmt_def0_type,
2769 &first_stmt_def1_type,
2770 &first_stmt_const_oprnd,
2779 /* First stmt of the SLP group should be the first load of
2780 the interleaving loop if data permutation is not
2782 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) != stmt)
2784 /* FORNOW: data permutations are not supported. */
2785 if (vect_print_dump_info (REPORT_SLP))
2787 fprintf (vect_dump, "Build SLP failed: strided "
2788 " loads need permutation ");
2789 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2795 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
2796 if (vect_supportable_dr_alignment (first_dr)
2797 == dr_unaligned_unsupported)
2799 if (vect_print_dump_info (REPORT_SLP))
2801 fprintf (vect_dump, "Build SLP failed: unsupported "
2802 " unaligned load ");
2803 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2809 /* Analyze costs (for the first stmt in the group). */
2810 vect_model_load_cost (vinfo_for_stmt (stmt),
2811 ncopies_for_cost, *node);
2815 if (DR_GROUP_NEXT_DR (vinfo_for_stmt (prev_stmt)) != stmt)
2817 /* FORNOW: data permutations are not supported. */
2818 if (vect_print_dump_info (REPORT_SLP))
2820 fprintf (vect_dump, "Build SLP failed: strided "
2821 " loads need permutation ");
2822 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2830 /* We stop the tree when we reach a group of loads. */
2831 stop_recursion = true;
2834 } /* Strided access. */
2837 if (REFERENCE_CLASS_P (rhs))
2839 /* Not strided load. */
2840 if (vect_print_dump_info (REPORT_SLP))
2842 fprintf (vect_dump, "Build SLP failed: not strided load ");
2843 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2846 /* FORNOW: Not strided loads are not supported. */
2850 /* Not memory operation. */
2851 if (!BINARY_CLASS_P (rhs) && !UNARY_CLASS_P (rhs))
2853 if (vect_print_dump_info (REPORT_SLP))
2855 fprintf (vect_dump, "Build SLP failed: operation");
2856 fprintf (vect_dump, " unsupported ");
2857 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2863 /* Find the def-stmts. */
2864 if (!vect_get_and_check_slp_defs (loop_vinfo, *node, rhs, &def_stmts0,
2865 &def_stmts1, &first_stmt_dt0,
2867 &first_stmt_def0_type,
2868 &first_stmt_def1_type,
2869 &first_stmt_const_oprnd,
2875 /* Add the costs of the node to the overall instance costs. */
2876 *inside_cost += SLP_TREE_INSIDE_OF_LOOP_COST (*node);
2877 *outside_cost += SLP_TREE_OUTSIDE_OF_LOOP_COST (*node);
2879 /* Strided loads were reached - stop the recursion. */
2883 /* Create SLP_TREE nodes for the definition node/s. */
2884 if (first_stmt_dt0 == vect_loop_def)
2886 slp_tree left_node = XNEW (struct _slp_tree);
2887 SLP_TREE_SCALAR_STMTS (left_node) = def_stmts0;
2888 SLP_TREE_VEC_STMTS (left_node) = NULL;
2889 SLP_TREE_LEFT (left_node) = NULL;
2890 SLP_TREE_RIGHT (left_node) = NULL;
2891 SLP_TREE_OUTSIDE_OF_LOOP_COST (left_node) = 0;
2892 SLP_TREE_INSIDE_OF_LOOP_COST (left_node) = 0;
2893 if (!vect_build_slp_tree (loop_vinfo, &left_node, group_size,
2894 slp_impossible, inside_cost, outside_cost,
2898 SLP_TREE_LEFT (*node) = left_node;
2901 if (first_stmt_dt1 == vect_loop_def)
2903 slp_tree right_node = XNEW (struct _slp_tree);
2904 SLP_TREE_SCALAR_STMTS (right_node) = def_stmts1;
2905 SLP_TREE_VEC_STMTS (right_node) = NULL;
2906 SLP_TREE_LEFT (right_node) = NULL;
2907 SLP_TREE_RIGHT (right_node) = NULL;
2908 SLP_TREE_OUTSIDE_OF_LOOP_COST (right_node) = 0;
2909 SLP_TREE_INSIDE_OF_LOOP_COST (right_node) = 0;
2910 if (!vect_build_slp_tree (loop_vinfo, &right_node, group_size,
2911 slp_impossible, inside_cost, outside_cost,
2915 SLP_TREE_RIGHT (*node) = right_node;
2923 vect_print_slp_tree (slp_tree node)
2931 fprintf (vect_dump, "node ");
2932 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2934 fprintf (vect_dump, "\n\tstmt %d ", i);
2935 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2937 fprintf (vect_dump, "\n");
2939 vect_print_slp_tree (SLP_TREE_LEFT (node));
2940 vect_print_slp_tree (SLP_TREE_RIGHT (node));
2944 /* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
2945 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
2946 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
2947 stmts in NODE are to be marked. */
2950 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
2958 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
2959 if (j < 0 || i == j)
2960 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
2962 vect_mark_slp_stmts (SLP_TREE_LEFT (node), mark, j);
2963 vect_mark_slp_stmts (SLP_TREE_RIGHT (node), mark, j);
2967 /* Analyze an SLP instance starting from a group of strided stores. Call
2968 vect_build_slp_tree to build a tree of packed stmts if possible.
2969 Return FALSE if it's impossible to SLP any stmt in the loop. */
2972 vect_analyze_slp_instance (loop_vec_info loop_vinfo, tree stmt)
2974 slp_instance new_instance;
2975 slp_tree node = XNEW (struct _slp_tree);
2976 unsigned int group_size = DR_GROUP_SIZE (vinfo_for_stmt (stmt));
2977 unsigned int unrolling_factor = 1, nunits;
2978 tree vectype, scalar_type, next;
2979 unsigned int vectorization_factor = 0, ncopies;
2980 bool slp_impossible = false;
2981 int inside_cost = 0, outside_cost = 0, ncopies_for_cost;
2983 /* FORNOW: multiple types are not supported. */
2984 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))));
2985 vectype = get_vectype_for_scalar_type (scalar_type);
2988 if (vect_print_dump_info (REPORT_SLP))
2990 fprintf (vect_dump, "Build SLP failed: unsupported data-type ");
2991 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
2996 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2997 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2998 ncopies = vectorization_factor / nunits;
3001 if (vect_print_dump_info (REPORT_SLP))
3002 fprintf (vect_dump, "SLP failed - multiple types ");
3007 /* Create a node (a root of the SLP tree) for the packed strided stores. */
3008 SLP_TREE_SCALAR_STMTS (node) = VEC_alloc (tree, heap, group_size);
3010 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
3013 VEC_safe_push (tree, heap, SLP_TREE_SCALAR_STMTS (node), next);
3014 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
3017 SLP_TREE_VEC_STMTS (node) = NULL;
3018 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
3019 SLP_TREE_LEFT (node) = NULL;
3020 SLP_TREE_RIGHT (node) = NULL;
3021 SLP_TREE_OUTSIDE_OF_LOOP_COST (node) = 0;
3022 SLP_TREE_INSIDE_OF_LOOP_COST (node) = 0;
3024 /* Calculate the unrolling factor. */
3025 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
3027 /* Calculate the number of vector stmts to create based on the unrolling
3028 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
3029 GROUP_SIZE / NUNITS otherwise. */
3030 ncopies_for_cost = unrolling_factor * group_size / nunits;
3032 /* Build the tree for the SLP instance. */
3033 if (vect_build_slp_tree (loop_vinfo, &node, group_size, &slp_impossible,
3034 &inside_cost, &outside_cost, ncopies_for_cost))
3036 /* Create a new SLP instance. */
3037 new_instance = XNEW (struct _slp_instance);
3038 SLP_INSTANCE_TREE (new_instance) = node;
3039 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
3040 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
3041 SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (new_instance) = outside_cost;
3042 SLP_INSTANCE_INSIDE_OF_LOOP_COST (new_instance) = inside_cost;
3043 VEC_safe_push (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo),
3045 if (vect_print_dump_info (REPORT_SLP))
3046 vect_print_slp_tree (node);
3051 /* Failed to SLP. */
3052 /* Free the allocated memory. */
3053 vect_free_slp_tree (node);
3058 /* SLP failed for this instance, but it is still possible to SLP other stmts
3064 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
3065 trees of packed scalar stmts if SLP is possible. */
3068 vect_analyze_slp (loop_vec_info loop_vinfo)
3071 VEC (tree, heap) *strided_stores = LOOP_VINFO_STRIDED_STORES (loop_vinfo);
3074 if (vect_print_dump_info (REPORT_SLP))
3075 fprintf (vect_dump, "=== vect_analyze_slp ===");
3077 for (i = 0; VEC_iterate (tree, strided_stores, i, store); i++)
3078 if (!vect_analyze_slp_instance (loop_vinfo, store))
3080 /* SLP failed. No instance can be SLPed in the loop. */
3081 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3082 fprintf (vect_dump, "SLP failed.");
3091 /* For each possible SLP instance decide whether to SLP it and calculate overall
3092 unrolling factor needed to SLP the loop. */
3095 vect_make_slp_decision (loop_vec_info loop_vinfo)
3097 unsigned int i, unrolling_factor = 1;
3098 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3099 slp_instance instance;
3100 int decided_to_slp = 0;
3102 if (vect_print_dump_info (REPORT_SLP))
3103 fprintf (vect_dump, "=== vect_make_slp_decision ===");
3105 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3107 /* FORNOW: SLP if you can. */
3108 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
3109 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
3111 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
3112 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
3113 loop-based vectorization. Such stmts will be marked as HYBRID. */
3114 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
3118 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
3120 if (decided_to_slp && vect_print_dump_info (REPORT_SLP))
3121 fprintf (vect_dump, "Decided to SLP %d instances. Unrolling factor %d",
3122 decided_to_slp, unrolling_factor);
3126 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
3127 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
3130 vect_detect_hybrid_slp_stmts (slp_tree node)
3134 imm_use_iterator imm_iter;
3140 for (i = 0; VEC_iterate (tree, SLP_TREE_SCALAR_STMTS (node), i, stmt); i++)
3141 if (PURE_SLP_STMT (vinfo_for_stmt (stmt))
3142 && TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
3143 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, GIMPLE_STMT_OPERAND (stmt, 0))
3144 if (vinfo_for_stmt (use_stmt)
3145 && !STMT_SLP_TYPE (vinfo_for_stmt (use_stmt)))
3146 vect_mark_slp_stmts (node, hybrid, i);
3148 vect_detect_hybrid_slp_stmts (SLP_TREE_LEFT (node));
3149 vect_detect_hybrid_slp_stmts (SLP_TREE_RIGHT (node));
3153 /* Find stmts that must be both vectorized and SLPed. */
3156 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
3159 VEC (slp_instance, heap) *slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3160 slp_instance instance;
3162 if (vect_print_dump_info (REPORT_SLP))
3163 fprintf (vect_dump, "=== vect_detect_hybrid_slp ===");
3165 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
3166 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance));
3170 /* Function vect_analyze_data_refs.
3172 Find all the data references in the loop.
3174 The general structure of the analysis of data refs in the vectorizer is as
3176 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
3177 find and analyze all data-refs in the loop and their dependences.
3178 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3179 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3180 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3185 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3187 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3189 VEC (data_reference_p, heap) *datarefs;
3190 struct data_reference *dr;
3193 if (vect_print_dump_info (REPORT_DETAILS))
3194 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
3196 compute_data_dependences_for_loop (loop, true,
3197 &LOOP_VINFO_DATAREFS (loop_vinfo),
3198 &LOOP_VINFO_DDRS (loop_vinfo));
3200 /* Go through the data-refs, check that the analysis succeeded. Update pointer
3201 from stmt_vec_info struct to DR and vectype. */
3202 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3204 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3207 stmt_vec_info stmt_info;
3209 tree base, offset, init;
3211 if (!dr || !DR_REF (dr))
3213 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3214 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
3218 stmt = DR_STMT (dr);
3219 stmt_info = vinfo_for_stmt (stmt);
3221 /* Check that analysis of the data-ref succeeded. */
3222 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
3225 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3227 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
3228 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3233 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3236 fprintf (vect_dump, "not vectorized: base addr of dr is a "
3241 if (!DR_SYMBOL_TAG (dr))
3243 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3245 fprintf (vect_dump, "not vectorized: no memory tag for ");
3246 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
3251 base = unshare_expr (DR_BASE_ADDRESS (dr));
3252 offset = unshare_expr (DR_OFFSET (dr));
3253 init = unshare_expr (DR_INIT (dr));
3255 /* Update DR field in stmt_vec_info struct. */
3256 bb = bb_for_stmt (stmt);
3258 /* If the dataref is in an inner-loop of the loop that is considered for
3259 for vectorization, we also want to analyze the access relative to
3260 the outer-loop (DR contains information only relative to the
3261 inner-most enclosing loop). We do that by building a reference to the
3262 first location accessed by the inner-loop, and analyze it relative to
3264 if (nested_in_vect_loop_p (loop, stmt))
3266 tree outer_step, outer_base, outer_init;
3267 HOST_WIDE_INT pbitsize, pbitpos;
3269 enum machine_mode pmode;
3270 int punsignedp, pvolatilep;
3271 affine_iv base_iv, offset_iv;
3274 /* Build a reference to the first location accessed by the
3275 inner-loop: *(BASE+INIT). (The first location is actually
3276 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3277 tree inner_base = build_fold_indirect_ref
3278 (fold_build2 (POINTER_PLUS_EXPR,
3279 TREE_TYPE (base), base,
3280 fold_convert (sizetype, init)));
3282 if (vect_print_dump_info (REPORT_DETAILS))
3284 fprintf (dump_file, "analyze in outer-loop: ");
3285 print_generic_expr (dump_file, inner_base, TDF_SLIM);
3288 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
3289 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3290 gcc_assert (outer_base != NULL_TREE);
3292 if (pbitpos % BITS_PER_UNIT != 0)
3294 if (vect_print_dump_info (REPORT_DETAILS))
3295 fprintf (dump_file, "failed: bit offset alignment.\n");
3299 outer_base = build_fold_addr_expr (outer_base);
3300 if (!simple_iv (loop, stmt, outer_base, &base_iv, false))
3302 if (vect_print_dump_info (REPORT_DETAILS))
3303 fprintf (dump_file, "failed: evolution of base is not affine.\n");
3310 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, poffset);
3317 offset_iv.base = ssize_int (0);
3318 offset_iv.step = ssize_int (0);
3320 else if (!simple_iv (loop, stmt, poffset, &offset_iv, false))
3322 if (vect_print_dump_info (REPORT_DETAILS))
3323 fprintf (dump_file, "evolution of offset is not affine.\n");
3327 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3328 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3329 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3330 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3331 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3333 outer_step = size_binop (PLUS_EXPR,
3334 fold_convert (ssizetype, base_iv.step),
3335 fold_convert (ssizetype, offset_iv.step));
3337 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3338 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3339 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
3340 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
3341 STMT_VINFO_DR_OFFSET (stmt_info) =
3342 fold_convert (ssizetype, offset_iv.base);
3343 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
3344 size_int (highest_pow2_factor (offset_iv.base));
3346 if (dump_file && (dump_flags & TDF_DETAILS))
3348 fprintf (dump_file, "\touter base_address: ");
3349 print_generic_expr (dump_file, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3350 fprintf (dump_file, "\n\touter offset from base address: ");
3351 print_generic_expr (dump_file, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3352 fprintf (dump_file, "\n\touter constant offset from base address: ");
3353 print_generic_expr (dump_file, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3354 fprintf (dump_file, "\n\touter step: ");
3355 print_generic_expr (dump_file, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3356 fprintf (dump_file, "\n\touter aligned to: ");
3357 print_generic_expr (dump_file, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3361 if (STMT_VINFO_DATA_REF (stmt_info))
3363 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3366 "not vectorized: more than one data ref in stmt: ");
3367 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3371 STMT_VINFO_DATA_REF (stmt_info) = dr;
3373 /* Set vectype for STMT. */
3374 scalar_type = TREE_TYPE (DR_REF (dr));
3375 STMT_VINFO_VECTYPE (stmt_info) =
3376 get_vectype_for_scalar_type (scalar_type);
3377 if (!STMT_VINFO_VECTYPE (stmt_info))
3379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3382 "not vectorized: no vectype for stmt: ");
3383 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3384 fprintf (vect_dump, " scalar_type: ");
3385 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3395 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3397 /* Function vect_mark_relevant.
3399 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3402 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
3403 enum vect_relevant relevant, bool live_p)
3405 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3406 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3407 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3409 if (vect_print_dump_info (REPORT_DETAILS))
3410 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
3412 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
3416 /* This is the last stmt in a sequence that was detected as a
3417 pattern that can potentially be vectorized. Don't mark the stmt
3418 as relevant/live because it's not going to be vectorized.
3419 Instead mark the pattern-stmt that replaces it. */
3421 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
3423 if (vect_print_dump_info (REPORT_DETAILS))
3424 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
3425 stmt_info = vinfo_for_stmt (pattern_stmt);
3426 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
3427 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
3428 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
3429 stmt = pattern_stmt;
3432 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
3433 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
3434 STMT_VINFO_RELEVANT (stmt_info) = relevant;
3436 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
3437 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
3439 if (vect_print_dump_info (REPORT_DETAILS))
3440 fprintf (vect_dump, "already marked relevant/live.");
3444 VEC_safe_push (tree, heap, *worklist, stmt);
3448 /* Function vect_stmt_relevant_p.
3450 Return true if STMT in loop that is represented by LOOP_VINFO is
3451 "relevant for vectorization".
3453 A stmt is considered "relevant for vectorization" if:
3454 - it has uses outside the loop.
3455 - it has vdefs (it alters memory).
3456 - control stmts in the loop (except for the exit condition).
3458 CHECKME: what other side effects would the vectorizer allow? */
3461 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
3462 enum vect_relevant *relevant, bool *live_p)
3464 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3465 ssa_op_iter op_iter;
3466 imm_use_iterator imm_iter;
3467 use_operand_p use_p;
3468 def_operand_p def_p;
3470 *relevant = vect_unused_in_loop;
3473 /* cond stmt other than loop exit cond. */
3474 if (is_ctrl_stmt (stmt)
3475 && STMT_VINFO_TYPE (vinfo_for_stmt (stmt)) != loop_exit_ctrl_vec_info_type)
3476 *relevant = vect_used_in_loop;
3478 /* changing memory. */
3479 if (TREE_CODE (stmt) != PHI_NODE)
3480 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
3482 if (vect_print_dump_info (REPORT_DETAILS))
3483 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
3484 *relevant = vect_used_in_loop;
3487 /* uses outside the loop. */
3488 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
3490 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
3492 basic_block bb = bb_for_stmt (USE_STMT (use_p));
3493 if (!flow_bb_inside_loop_p (loop, bb))
3495 if (vect_print_dump_info (REPORT_DETAILS))
3496 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
3498 /* We expect all such uses to be in the loop exit phis
3499 (because of loop closed form) */
3500 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
3501 gcc_assert (bb == single_exit (loop)->dest);
3508 return (*live_p || *relevant);
3513 Function process_use.
3516 - a USE in STMT in a loop represented by LOOP_VINFO
3517 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
3518 that defined USE. This is dont by calling mark_relevant and passing it
3519 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
3522 Generally, LIVE_P and RELEVANT are used to define the liveness and
3523 relevance info of the DEF_STMT of this USE:
3524 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
3525 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
3527 - case 1: If USE is used only for address computations (e.g. array indexing),
3528 which does not need to be directly vectorized, then the liveness/relevance
3529 of the respective DEF_STMT is left unchanged.
3530 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
3531 skip DEF_STMT cause it had already been processed.
3532 - case 3: If DEF_STMT and STMT are in different nests, then "relevant" will
3533 be modified accordingly.
3535 Return true if everything is as expected. Return false otherwise. */
3538 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
3539 enum vect_relevant relevant, VEC(tree,heap) **worklist)
3541 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3542 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3543 stmt_vec_info dstmt_vinfo;
3544 basic_block bb, def_bb;
3546 enum vect_def_type dt;
3548 /* case 1: we are only interested in uses that need to be vectorized. Uses
3549 that are used for address computation are not considered relevant. */
3550 if (!exist_non_indexing_operands_for_use_p (use, stmt))
3553 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
3555 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
3556 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
3560 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
3563 def_bb = bb_for_stmt (def_stmt);
3564 if (!flow_bb_inside_loop_p (loop, def_bb))
3566 if (vect_print_dump_info (REPORT_DETAILS))
3567 fprintf (vect_dump, "def_stmt is out of loop.");
3571 /* case 2: A reduction phi (STMT) defined by a reduction stmt (DEF_STMT).
3572 DEF_STMT must have already been processed, because this should be the
3573 only way that STMT, which is a reduction-phi, was put in the worklist,
3574 as there should be no other uses for DEF_STMT in the loop. So we just
3575 check that everything is as expected, and we are done. */
3576 dstmt_vinfo = vinfo_for_stmt (def_stmt);
3577 bb = bb_for_stmt (stmt);
3578 if (TREE_CODE (stmt) == PHI_NODE
3579 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
3580 && TREE_CODE (def_stmt) != PHI_NODE
3581 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
3582 && bb->loop_father == def_bb->loop_father)
3584 if (vect_print_dump_info (REPORT_DETAILS))
3585 fprintf (vect_dump, "reduc-stmt defining reduc-phi in the same nest.");
3586 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
3587 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
3588 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
3589 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
3590 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
3594 /* case 3a: outer-loop stmt defining an inner-loop stmt:
3595 outer-loop-header-bb:
3601 if (flow_loop_nested_p (def_bb->loop_father, bb->loop_father))
3603 if (vect_print_dump_info (REPORT_DETAILS))
3604 fprintf (vect_dump, "outer-loop def-stmt defining inner-loop stmt.");
3607 case vect_unused_in_loop:
3608 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3609 vect_used_by_reduction : vect_unused_in_loop;
3611 case vect_used_in_outer_by_reduction:
3612 relevant = vect_used_by_reduction;
3614 case vect_used_in_outer:
3615 relevant = vect_used_in_loop;
3617 case vect_used_by_reduction:
3618 case vect_used_in_loop:
3626 /* case 3b: inner-loop stmt defining an outer-loop stmt:
3627 outer-loop-header-bb:
3633 else if (flow_loop_nested_p (bb->loop_father, def_bb->loop_father))
3635 if (vect_print_dump_info (REPORT_DETAILS))
3636 fprintf (vect_dump, "inner-loop def-stmt defining outer-loop stmt.");
3639 case vect_unused_in_loop:
3640 relevant = (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def) ?
3641 vect_used_in_outer_by_reduction : vect_unused_in_loop;
3644 case vect_used_in_outer_by_reduction:
3645 case vect_used_in_outer:
3648 case vect_used_by_reduction:
3649 relevant = vect_used_in_outer_by_reduction;
3652 case vect_used_in_loop:
3653 relevant = vect_used_in_outer;
3661 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
3666 /* Function vect_mark_stmts_to_be_vectorized.
3668 Not all stmts in the loop need to be vectorized. For example:
3677 Stmt 1 and 3 do not need to be vectorized, because loop control and
3678 addressing of vectorized data-refs are handled differently.
3680 This pass detects such stmts. */
3683 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3685 VEC(tree,heap) *worklist;
3686 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3687 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3688 unsigned int nbbs = loop->num_nodes;
3689 block_stmt_iterator si;
3693 stmt_vec_info stmt_vinfo;
3697 enum vect_relevant relevant;
3699 if (vect_print_dump_info (REPORT_DETAILS))
3700 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
3702 worklist = VEC_alloc (tree, heap, 64);
3704 /* 1. Init worklist. */
3705 for (i = 0; i < nbbs; i++)
3708 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3710 if (vect_print_dump_info (REPORT_DETAILS))
3712 fprintf (vect_dump, "init: phi relevant? ");
3713 print_generic_expr (vect_dump, phi, TDF_SLIM);
3716 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
3717 vect_mark_relevant (&worklist, phi, relevant, live_p);
3719 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3721 stmt = bsi_stmt (si);
3722 if (vect_print_dump_info (REPORT_DETAILS))
3724 fprintf (vect_dump, "init: stmt relevant? ");
3725 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3728 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
3729 vect_mark_relevant (&worklist, stmt, relevant, live_p);
3733 /* 2. Process_worklist */
3734 while (VEC_length (tree, worklist) > 0)
3736 use_operand_p use_p;
3739 stmt = VEC_pop (tree, worklist);
3740 if (vect_print_dump_info (REPORT_DETAILS))
3742 fprintf (vect_dump, "worklist: examine stmt: ");
3743 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3746 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
3747 (DEF_STMT) as relevant/irrelevant and live/dead according to the
3748 liveness and relevance properties of STMT. */
3749 ann = stmt_ann (stmt);
3750 stmt_vinfo = vinfo_for_stmt (stmt);
3751 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
3752 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
3754 /* Generally, the liveness and relevance properties of STMT are
3755 propagated as is to the DEF_STMTs of its USEs:
3756 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
3757 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
3759 One exception is when STMT has been identified as defining a reduction
3760 variable; in this case we set the liveness/relevance as follows:
3762 relevant = vect_used_by_reduction
3763 This is because we distinguish between two kinds of relevant stmts -
3764 those that are used by a reduction computation, and those that are
3765 (also) used by a regular computation. This allows us later on to
3766 identify stmts that are used solely by a reduction, and therefore the
3767 order of the results that they produce does not have to be kept.
3769 Reduction phis are expected to be used by a reduction stmt, or by
3770 in an outer loop; Other reduction stmts are expected to be
3771 in the loop, and possibly used by a stmt in an outer loop.
3772 Here are the expected values of "relevant" for reduction phis/stmts:
3775 vect_unused_in_loop ok
3776 vect_used_in_outer_by_reduction ok ok
3777 vect_used_in_outer ok ok
3778 vect_used_by_reduction ok
3779 vect_used_in_loop */
3781 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
3783 enum vect_relevant tmp_relevant = relevant;
3784 switch (tmp_relevant)
3786 case vect_unused_in_loop:
3787 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
3788 relevant = vect_used_by_reduction;
3791 case vect_used_in_outer_by_reduction:
3792 case vect_used_in_outer:
3793 gcc_assert (TREE_CODE (stmt) != WIDEN_SUM_EXPR
3794 && TREE_CODE (stmt) != DOT_PROD_EXPR);
3797 case vect_used_by_reduction:
3798 if (TREE_CODE (stmt) == PHI_NODE)
3801 case vect_used_in_loop:
3803 if (vect_print_dump_info (REPORT_DETAILS))
3804 fprintf (vect_dump, "unsupported use of reduction.");
3805 VEC_free (tree, heap, worklist);
3811 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
3813 tree op = USE_FROM_PTR (use_p);
3814 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
3816 VEC_free (tree, heap, worklist);
3820 } /* while worklist */
3822 VEC_free (tree, heap, worklist);
3827 /* Function vect_can_advance_ivs_p
3829 In case the number of iterations that LOOP iterates is unknown at compile
3830 time, an epilog loop will be generated, and the loop induction variables
3831 (IVs) will be "advanced" to the value they are supposed to take just before
3832 the epilog loop. Here we check that the access function of the loop IVs
3833 and the expression that represents the loop bound are simple enough.
3834 These restrictions will be relaxed in the future. */
3837 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
3839 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3840 basic_block bb = loop->header;
3843 /* Analyze phi functions of the loop header. */
3845 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "vect_can_advance_ivs_p:");
3848 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3850 tree access_fn = NULL;
3851 tree evolution_part;
3853 if (vect_print_dump_info (REPORT_DETAILS))
3855 fprintf (vect_dump, "Analyze phi: ");
3856 print_generic_expr (vect_dump, phi, TDF_SLIM);
3859 /* Skip virtual phi's. The data dependences that are associated with
3860 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3862 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3864 if (vect_print_dump_info (REPORT_DETAILS))
3865 fprintf (vect_dump, "virtual phi. skip.");
3869 /* Skip reduction phis. */
3871 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3873 if (vect_print_dump_info (REPORT_DETAILS))
3874 fprintf (vect_dump, "reduc phi. skip.");
3878 /* Analyze the evolution function. */
3880 access_fn = instantiate_parameters
3881 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
3885 if (vect_print_dump_info (REPORT_DETAILS))
3886 fprintf (vect_dump, "No Access function.");
3890 if (vect_print_dump_info (REPORT_DETAILS))
3892 fprintf (vect_dump, "Access function of PHI: ");
3893 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
3896 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
3898 if (evolution_part == NULL_TREE)
3900 if (vect_print_dump_info (REPORT_DETAILS))
3901 fprintf (vect_dump, "No evolution.");
3905 /* FORNOW: We do not transform initial conditions of IVs
3906 which evolution functions are a polynomial of degree >= 2. */
3908 if (tree_is_chrec (evolution_part))
3916 /* Function vect_get_loop_niters.
3918 Determine how many iterations the loop is executed.
3919 If an expression that represents the number of iterations
3920 can be constructed, place it in NUMBER_OF_ITERATIONS.
3921 Return the loop exit condition. */
3924 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
3928 if (vect_print_dump_info (REPORT_DETAILS))
3929 fprintf (vect_dump, "=== get_loop_niters ===");
3931 niters = number_of_exit_cond_executions (loop);
3933 if (niters != NULL_TREE
3934 && niters != chrec_dont_know)
3936 *number_of_iterations = niters;
3938 if (vect_print_dump_info (REPORT_DETAILS))
3940 fprintf (vect_dump, "==> get_loop_niters:" );
3941 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
3945 return get_loop_exit_condition (loop);
3949 /* Function vect_analyze_loop_1.
3951 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3952 for it. The different analyses will record information in the
3953 loop_vec_info struct. This is a subset of the analyses applied in
3954 vect_analyze_loop, to be applied on an inner-loop nested in the loop
3955 that is now considered for (outer-loop) vectorization. */
3957 static loop_vec_info
3958 vect_analyze_loop_1 (struct loop *loop)
3960 loop_vec_info loop_vinfo;
3962 if (vect_print_dump_info (REPORT_DETAILS))
3963 fprintf (vect_dump, "===== analyze_loop_nest_1 =====");
3965 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3967 loop_vinfo = vect_analyze_loop_form (loop);
3970 if (vect_print_dump_info (REPORT_DETAILS))
3971 fprintf (vect_dump, "bad inner-loop form.");
3979 /* Function vect_analyze_loop_form.
3981 Verify that certain CFG restrictions hold, including:
3982 - the loop has a pre-header
3983 - the loop has a single entry and exit
3984 - the loop exit condition is simple enough, and the number of iterations
3985 can be analyzed (a countable loop). */
3988 vect_analyze_loop_form (struct loop *loop)
3990 loop_vec_info loop_vinfo;
3992 tree number_of_iterations = NULL;
3993 loop_vec_info inner_loop_vinfo = NULL;
3995 if (vect_print_dump_info (REPORT_DETAILS))
3996 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
3998 /* Different restrictions apply when we are considering an inner-most loop,
3999 vs. an outer (nested) loop.
4000 (FORNOW. May want to relax some of these restrictions in the future). */
4004 /* Inner-most loop. We currently require that the number of BBs is
4005 exactly 2 (the header and latch). Vectorizable inner-most loops
4016 if (loop->num_nodes != 2)
4018 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4019 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4023 if (empty_block_p (loop->header))
4025 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4026 fprintf (vect_dump, "not vectorized: empty loop.");
4032 struct loop *innerloop = loop->inner;
4033 edge backedge, entryedge;
4035 /* Nested loop. We currently require that the loop is doubly-nested,
4036 contains a single inner loop, and the number of BBs is exactly 5.
4037 Vectorizable outer-loops look like this:
4049 The inner-loop has the properties expected of inner-most loops
4050 as described above. */
4052 if ((loop->inner)->inner || (loop->inner)->next)
4054 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4055 fprintf (vect_dump, "not vectorized: multiple nested loops.");
4059 /* Analyze the inner-loop. */
4060 inner_loop_vinfo = vect_analyze_loop_1 (loop->inner);
4061 if (!inner_loop_vinfo)
4063 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4064 fprintf (vect_dump, "not vectorized: Bad inner loop.");
4068 if (!expr_invariant_in_loop_p (loop,
4069 LOOP_VINFO_NITERS (inner_loop_vinfo)))
4071 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4073 "not vectorized: inner-loop count not invariant.");
4074 destroy_loop_vec_info (inner_loop_vinfo, true);
4078 if (loop->num_nodes != 5)
4080 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4081 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
4082 destroy_loop_vec_info (inner_loop_vinfo, true);
4086 gcc_assert (EDGE_COUNT (innerloop->header->preds) == 2);
4087 backedge = EDGE_PRED (innerloop->header, 1);
4088 entryedge = EDGE_PRED (innerloop->header, 0);
4089 if (EDGE_PRED (innerloop->header, 0)->src == innerloop->latch)
4091 backedge = EDGE_PRED (innerloop->header, 0);
4092 entryedge = EDGE_PRED (innerloop->header, 1);
4095 if (entryedge->src != loop->header
4096 || !single_exit (innerloop)
4097 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
4099 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4100 fprintf (vect_dump, "not vectorized: unsupported outerloop form.");
4101 destroy_loop_vec_info (inner_loop_vinfo, true);
4105 if (vect_print_dump_info (REPORT_DETAILS))
4106 fprintf (vect_dump, "Considering outer-loop vectorization.");
4109 if (!single_exit (loop)
4110 || EDGE_COUNT (loop->header->preds) != 2)
4112 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4114 if (!single_exit (loop))
4115 fprintf (vect_dump, "not vectorized: multiple exits.");
4116 else if (EDGE_COUNT (loop->header->preds) != 2)
4117 fprintf (vect_dump, "not vectorized: too many incoming edges.");
4119 if (inner_loop_vinfo)
4120 destroy_loop_vec_info (inner_loop_vinfo, true);
4124 /* We assume that the loop exit condition is at the end of the loop. i.e,
4125 that the loop is represented as a do-while (with a proper if-guard
4126 before the loop if needed), where the loop header contains all the
4127 executable statements, and the latch is empty. */
4128 if (!empty_block_p (loop->latch)
4129 || phi_nodes (loop->latch))
4131 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4132 fprintf (vect_dump, "not vectorized: unexpected loop form.");
4133 if (inner_loop_vinfo)
4134 destroy_loop_vec_info (inner_loop_vinfo, true);
4138 /* Make sure there exists a single-predecessor exit bb: */
4139 if (!single_pred_p (single_exit (loop)->dest))
4141 edge e = single_exit (loop);
4142 if (!(e->flags & EDGE_ABNORMAL))
4144 split_loop_exit_edge (e);
4145 if (vect_print_dump_info (REPORT_DETAILS))
4146 fprintf (vect_dump, "split exit edge.");
4150 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4151 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
4152 if (inner_loop_vinfo)
4153 destroy_loop_vec_info (inner_loop_vinfo, true);
4158 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
4161 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4162 fprintf (vect_dump, "not vectorized: complicated exit condition.");
4163 if (inner_loop_vinfo)
4164 destroy_loop_vec_info (inner_loop_vinfo, true);
4168 if (!number_of_iterations)
4170 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4172 "not vectorized: number of iterations cannot be computed.");
4173 if (inner_loop_vinfo)
4174 destroy_loop_vec_info (inner_loop_vinfo, true);
4178 if (chrec_contains_undetermined (number_of_iterations))
4180 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
4181 fprintf (vect_dump, "Infinite number of iterations.");
4182 if (inner_loop_vinfo)
4183 destroy_loop_vec_info (inner_loop_vinfo, true);
4187 if (!NITERS_KNOWN_P (number_of_iterations))
4189 if (vect_print_dump_info (REPORT_DETAILS))
4191 fprintf (vect_dump, "Symbolic number of iterations is ");
4192 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
4195 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
4197 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
4198 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
4199 if (inner_loop_vinfo)
4200 destroy_loop_vec_info (inner_loop_vinfo, false);
4204 loop_vinfo = new_loop_vec_info (loop);
4205 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
4206 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
4208 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type;
4210 /* CHECKME: May want to keep it around it in the future. */
4211 if (inner_loop_vinfo)
4212 destroy_loop_vec_info (inner_loop_vinfo, false);
4214 gcc_assert (!loop->aux);
4215 loop->aux = loop_vinfo;
4220 /* Function vect_analyze_loop.
4222 Apply a set of analyses on LOOP, and create a loop_vec_info struct
4223 for it. The different analyses will record information in the
4224 loop_vec_info struct. */
4226 vect_analyze_loop (struct loop *loop)
4229 loop_vec_info loop_vinfo;
4231 if (vect_print_dump_info (REPORT_DETAILS))
4232 fprintf (vect_dump, "===== analyze_loop_nest =====");
4234 if (loop_outer (loop)
4235 && loop_vec_info_for_loop (loop_outer (loop))
4236 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
4238 if (vect_print_dump_info (REPORT_DETAILS))
4239 fprintf (vect_dump, "outer-loop already vectorized.");
4243 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
4245 loop_vinfo = vect_analyze_loop_form (loop);
4248 if (vect_print_dump_info (REPORT_DETAILS))
4249 fprintf (vect_dump, "bad loop form.");
4253 /* Find all data references in the loop (which correspond to vdefs/vuses)
4254 and analyze their evolution in the loop.
4256 FORNOW: Handle only simple, array references, which
4257 alignment can be forced, and aligned pointer-references. */
4259 ok = vect_analyze_data_refs (loop_vinfo);
4262 if (vect_print_dump_info (REPORT_DETAILS))
4263 fprintf (vect_dump, "bad data references.");
4264 destroy_loop_vec_info (loop_vinfo, true);
4268 /* Classify all cross-iteration scalar data-flow cycles.
4269 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4271 vect_analyze_scalar_cycles (loop_vinfo);
4273 vect_pattern_recog (loop_vinfo);
4275 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4277 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4280 if (vect_print_dump_info (REPORT_DETAILS))
4281 fprintf (vect_dump, "unexpected pattern.");
4282 destroy_loop_vec_info (loop_vinfo, true);
4286 /* Analyze the alignment of the data-refs in the loop.
4287 Fail if a data reference is found that cannot be vectorized. */
4289 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4292 if (vect_print_dump_info (REPORT_DETAILS))
4293 fprintf (vect_dump, "bad data alignment.");
4294 destroy_loop_vec_info (loop_vinfo, true);
4298 ok = vect_determine_vectorization_factor (loop_vinfo);
4301 if (vect_print_dump_info (REPORT_DETAILS))
4302 fprintf (vect_dump, "can't determine vectorization factor.");
4303 destroy_loop_vec_info (loop_vinfo, true);
4307 /* Analyze data dependences between the data-refs in the loop.
4308 FORNOW: fail at the first data dependence that we encounter. */
4310 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4313 if (vect_print_dump_info (REPORT_DETAILS))
4314 fprintf (vect_dump, "bad data dependence.");
4315 destroy_loop_vec_info (loop_vinfo, true);
4319 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4320 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4322 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4325 if (vect_print_dump_info (REPORT_DETAILS))
4326 fprintf (vect_dump, "bad data access.");
4327 destroy_loop_vec_info (loop_vinfo, true);
4331 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
4332 It is important to call pruning after vect_analyze_data_ref_accesses,
4333 since we use grouping information gathered by interleaving analysis. */
4334 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
4337 if (vect_print_dump_info (REPORT_DETAILS))
4338 fprintf (vect_dump, "too long list of versioning for alias "
4340 destroy_loop_vec_info (loop_vinfo, true);
4344 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
4345 ok = vect_analyze_slp (loop_vinfo);
4348 /* Decide which possible SLP instances to SLP. */
4349 vect_make_slp_decision (loop_vinfo);
4351 /* Find stmts that need to be both vectorized and SLPed. */
4352 vect_detect_hybrid_slp (loop_vinfo);
4355 /* This pass will decide on using loop versioning and/or loop peeling in
4356 order to enhance the alignment of data references in the loop. */
4358 ok = vect_enhance_data_refs_alignment (loop_vinfo);
4361 if (vect_print_dump_info (REPORT_DETAILS))
4362 fprintf (vect_dump, "bad data alignment.");
4363 destroy_loop_vec_info (loop_vinfo, true);
4367 /* Scan all the operations in the loop and make sure they are
4370 ok = vect_analyze_operations (loop_vinfo);
4373 if (vect_print_dump_info (REPORT_DETAILS))
4374 fprintf (vect_dump, "bad operation or unsupported loop bound.");
4375 destroy_loop_vec_info (loop_vinfo, true);
4379 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;