1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
104 stmt_vec_info stmt_info;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 /* Two cases of "relevant" phis: those that define an
126 induction that is used in the loop, and those that
127 define a reduction. */
128 if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
129 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
130 || (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
131 && STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def))
133 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
134 scalar_type = TREE_TYPE (PHI_RESULT (phi));
136 if (vect_print_dump_info (REPORT_DETAILS))
138 fprintf (vect_dump, "get vectype for scalar type: ");
139 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
142 vectype = get_vectype_for_scalar_type (scalar_type);
145 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
148 "not vectorized: unsupported data-type ");
149 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
153 STMT_VINFO_VECTYPE (stmt_info) = vectype;
155 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "vectype: ");
158 print_generic_expr (vect_dump, vectype, TDF_SLIM);
161 nunits = TYPE_VECTOR_SUBPARTS (vectype);
162 if (vect_print_dump_info (REPORT_DETAILS))
163 fprintf (vect_dump, "nunits = %d", nunits);
165 if (!vectorization_factor
166 || (nunits > vectorization_factor))
167 vectorization_factor = nunits;
171 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
173 tree stmt = bsi_stmt (si);
174 stmt_info = vinfo_for_stmt (stmt);
176 if (vect_print_dump_info (REPORT_DETAILS))
178 fprintf (vect_dump, "==> examining statement: ");
179 print_generic_expr (vect_dump, stmt, TDF_SLIM);
182 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
185 gcc_assert (stmt_info);
187 /* skip stmts which do not need to be vectorized. */
188 if (!STMT_VINFO_RELEVANT_P (stmt_info)
189 && !STMT_VINFO_LIVE_P (stmt_info))
191 if (vect_print_dump_info (REPORT_DETAILS))
192 fprintf (vect_dump, "skip.");
196 if (!GIMPLE_STMT_P (stmt)
197 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
199 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
201 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
202 print_generic_expr (vect_dump, stmt, TDF_SLIM);
207 if (STMT_VINFO_VECTYPE (stmt_info))
209 /* The only case when a vectype had been already set is for stmts
210 that contain a dataref, or for "pattern-stmts" (stmts generated
211 by the vectorizer to represent/replace a certain idiom). */
212 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
213 || is_pattern_stmt_p (stmt_info));
214 vectype = STMT_VINFO_VECTYPE (stmt_info);
218 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
219 && !is_pattern_stmt_p (stmt_info));
221 /* We set the vectype according to the type of the result (lhs).
222 For stmts whose result-type is different than the type of the
223 arguments (e.g. demotion, promotion), vectype will be reset
224 appropriately (later). Note that we have to visit the smallest
225 datatype in this function, because that determines the VF.
226 If the smallest datatype in the loop is present only as the
227 rhs of a promotion operation - we'd miss it here.
228 However, in such a case, that a variable of this datatype
229 does not appear in the lhs anywhere in the loop, it shouldn't
230 affect the vectorization factor. */
231 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
233 if (vect_print_dump_info (REPORT_DETAILS))
235 fprintf (vect_dump, "get vectype for scalar type: ");
236 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
239 vectype = get_vectype_for_scalar_type (scalar_type);
242 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
245 "not vectorized: unsupported data-type ");
246 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
250 STMT_VINFO_VECTYPE (stmt_info) = vectype;
253 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "vectype: ");
256 print_generic_expr (vect_dump, vectype, TDF_SLIM);
259 nunits = TYPE_VECTOR_SUBPARTS (vectype);
260 if (vect_print_dump_info (REPORT_DETAILS))
261 fprintf (vect_dump, "nunits = %d", nunits);
263 if (!vectorization_factor
264 || (nunits > vectorization_factor))
265 vectorization_factor = nunits;
270 /* TODO: Analyze cost. Decide if worth while to vectorize. */
272 if (vectorization_factor <= 1)
274 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
275 fprintf (vect_dump, "not vectorized: unsupported data-type");
278 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
284 /* Function vect_analyze_operations.
286 Scan the loop stmts and make sure they are all vectorizable. */
289 vect_analyze_operations (loop_vec_info loop_vinfo)
291 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
292 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
293 int nbbs = loop->num_nodes;
294 block_stmt_iterator si;
295 unsigned int vectorization_factor = 0;
299 stmt_vec_info stmt_info;
300 bool need_to_vectorize = false;
302 if (vect_print_dump_info (REPORT_DETAILS))
303 fprintf (vect_dump, "=== vect_analyze_operations ===");
305 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
306 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
308 for (i = 0; i < nbbs; i++)
310 basic_block bb = bbs[i];
312 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
314 stmt_info = vinfo_for_stmt (phi);
315 if (vect_print_dump_info (REPORT_DETAILS))
317 fprintf (vect_dump, "examining phi: ");
318 print_generic_expr (vect_dump, phi, TDF_SLIM);
321 gcc_assert (stmt_info);
323 if (STMT_VINFO_LIVE_P (stmt_info))
325 /* FORNOW: not yet supported. */
326 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
327 fprintf (vect_dump, "not vectorized: value used after loop.");
331 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
332 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
334 /* Most likely a reduction-like computation that is used
336 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
337 fprintf (vect_dump, "not vectorized: unsupported pattern.");
341 if (STMT_VINFO_RELEVANT_P (stmt_info))
342 need_to_vectorize = true;
345 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
347 tree stmt = bsi_stmt (si);
348 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
349 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
351 if (vect_print_dump_info (REPORT_DETAILS))
353 fprintf (vect_dump, "==> examining statement: ");
354 print_generic_expr (vect_dump, stmt, TDF_SLIM);
357 gcc_assert (stmt_info);
359 /* skip stmts which do not need to be vectorized.
360 this is expected to include:
361 - the COND_EXPR which is the loop exit condition
362 - any LABEL_EXPRs in the loop
363 - computations that are used only for array indexing or loop
366 if (!STMT_VINFO_RELEVANT_P (stmt_info)
367 && !STMT_VINFO_LIVE_P (stmt_info))
369 if (vect_print_dump_info (REPORT_DETAILS))
370 fprintf (vect_dump, "irrelevant.");
374 switch (STMT_VINFO_DEF_TYPE (stmt_info))
379 case vect_reduction_def:
380 gcc_assert (relevance == vect_unused_in_loop);
383 case vect_induction_def:
384 case vect_constant_def:
385 case vect_invariant_def:
386 case vect_unknown_def_type:
391 if (STMT_VINFO_RELEVANT_P (stmt_info))
393 gcc_assert (GIMPLE_STMT_P (stmt)
394 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
395 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
396 need_to_vectorize = true;
399 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
400 || vectorizable_type_demotion (stmt, NULL, NULL)
401 || vectorizable_conversion (stmt, NULL, NULL)
402 || vectorizable_operation (stmt, NULL, NULL)
403 || vectorizable_assignment (stmt, NULL, NULL)
404 || vectorizable_load (stmt, NULL, NULL)
405 || vectorizable_call (stmt, NULL, NULL)
406 || vectorizable_store (stmt, NULL, NULL)
407 || vectorizable_condition (stmt, NULL, NULL)
408 || vectorizable_reduction (stmt, NULL, NULL));
410 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
411 need extra handling, except for vectorizable reductions. */
412 if (STMT_VINFO_LIVE_P (stmt_info)
413 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
414 ok |= vectorizable_live_operation (stmt, NULL, NULL);
418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
420 fprintf (vect_dump, "not vectorized: stmt not supported: ");
421 print_generic_expr (vect_dump, stmt, TDF_SLIM);
428 /* TODO: Analyze cost. Decide if worth while to vectorize. */
430 /* All operations in the loop are either irrelevant (deal with loop
431 control, or dead), or only used outside the loop and can be moved
432 out of the loop (e.g. invariants, inductions). The loop can be
433 optimized away by scalar optimizations. We're better off not
434 touching this loop. */
435 if (!need_to_vectorize)
437 if (vect_print_dump_info (REPORT_DETAILS))
439 "All the computation can be taken out of the loop.");
440 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
442 "not vectorized: redundant loop. no profit to vectorize.");
446 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
447 && vect_print_dump_info (REPORT_DETAILS))
449 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
450 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
452 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
453 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
454 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
455 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
456 * vectorization_factor))))
458 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
459 fprintf (vect_dump, "not vectorized: iteration count too small.");
463 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
464 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
465 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
467 if (vect_print_dump_info (REPORT_DETAILS))
468 fprintf (vect_dump, "epilog loop required.");
469 if (!vect_can_advance_ivs_p (loop_vinfo))
471 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
473 "not vectorized: can't create epilog loop 1.");
476 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
478 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
480 "not vectorized: can't create epilog loop 2.");
489 /* Function exist_non_indexing_operands_for_use_p
491 USE is one of the uses attached to STMT. Check if USE is
492 used in STMT for anything other than indexing an array. */
495 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
498 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
500 /* USE corresponds to some operand in STMT. If there is no data
501 reference in STMT, then any operand that corresponds to USE
502 is not indexing an array. */
503 if (!STMT_VINFO_DATA_REF (stmt_info))
506 /* STMT has a data_ref. FORNOW this means that its of one of
510 (This should have been verified in analyze_data_refs).
512 'var' in the second case corresponds to a def, not a use,
513 so USE cannot correspond to any operands that are not used
516 Therefore, all we need to check is if STMT falls into the
517 first case, and whether var corresponds to USE. */
519 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
522 operand = GIMPLE_STMT_OPERAND (stmt, 1);
524 if (TREE_CODE (operand) != SSA_NAME)
534 /* Function vect_analyze_scalar_cycles.
536 Examine the cross iteration def-use cycles of scalar variables, by
537 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
538 following: invariant, induction, reduction, unknown.
540 Some forms of scalar cycles are not yet supported.
542 Example1: reduction: (unsupported yet)
548 Example2: induction: (unsupported yet)
554 Note: the following loop *is* vectorizable:
560 even though it has a def-use cycle caused by the induction variable i:
562 loop: i_2 = PHI (i_0, i_1)
567 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
568 it does not need to be vectorized because it is only used for array
569 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
570 loop2 on the other hand is relevant (it is being written to memory).
574 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
577 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
578 basic_block bb = loop->header;
580 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
582 if (vect_print_dump_info (REPORT_DETAILS))
583 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
585 /* First - identify all inductions. */
586 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
588 tree access_fn = NULL;
589 tree def = PHI_RESULT (phi);
590 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
592 if (vect_print_dump_info (REPORT_DETAILS))
594 fprintf (vect_dump, "Analyze phi: ");
595 print_generic_expr (vect_dump, phi, TDF_SLIM);
598 /* Skip virtual phi's. The data dependences that are associated with
599 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
600 if (!is_gimple_reg (SSA_NAME_VAR (def)))
603 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
605 /* Analyze the evolution function. */
606 access_fn = analyze_scalar_evolution (loop, def);
607 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
609 fprintf (vect_dump, "Access function of PHI: ");
610 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
614 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
616 VEC_safe_push (tree, heap, worklist, phi);
620 if (vect_print_dump_info (REPORT_DETAILS))
621 fprintf (vect_dump, "Detected induction.");
622 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
626 /* Second - identify all reductions. */
627 while (VEC_length (tree, worklist) > 0)
629 tree phi = VEC_pop (tree, worklist);
630 tree def = PHI_RESULT (phi);
631 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
634 if (vect_print_dump_info (REPORT_DETAILS))
636 fprintf (vect_dump, "Analyze phi: ");
637 print_generic_expr (vect_dump, phi, TDF_SLIM);
640 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
641 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
643 reduc_stmt = vect_is_simple_reduction (loop, phi);
646 if (vect_print_dump_info (REPORT_DETAILS))
647 fprintf (vect_dump, "Detected reduction.");
648 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
649 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
653 if (vect_print_dump_info (REPORT_DETAILS))
654 fprintf (vect_dump, "Unknown def-use cycle pattern.");
657 VEC_free (tree, heap, worklist);
662 /* Function vect_insert_into_interleaving_chain.
664 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
667 vect_insert_into_interleaving_chain (struct data_reference *dra,
668 struct data_reference *drb)
670 tree prev, next, next_init;
671 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
672 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
674 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
675 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
678 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
679 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
682 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
683 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
687 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
690 /* We got to the end of the list. Insert here. */
691 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
692 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
696 /* Function vect_update_interleaving_chain.
698 For two data-refs DRA and DRB that are a part of a chain interleaved data
699 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
701 There are four possible cases:
702 1. New stmts - both DRA and DRB are not a part of any chain:
705 2. DRB is a part of a chain and DRA is not:
706 no need to update FIRST_DR
707 no need to insert DRB
708 insert DRA according to init
709 3. DRA is a part of a chain and DRB is not:
710 if (init of FIRST_DR > init of DRB)
712 NEXT(FIRST_DR) = previous FIRST_DR
714 insert DRB according to its init
715 4. both DRA and DRB are in some interleaving chains:
716 choose the chain with the smallest init of FIRST_DR
717 insert the nodes of the second chain into the first one. */
720 vect_update_interleaving_chain (struct data_reference *drb,
721 struct data_reference *dra)
723 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
724 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
725 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
726 tree node, prev, next, node_init, first_stmt;
728 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
729 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
731 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
732 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
733 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
737 /* 2. DRB is a part of a chain and DRA is not. */
738 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
740 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
741 /* Insert DRA into the chain of DRB. */
742 vect_insert_into_interleaving_chain (dra, drb);
746 /* 3. DRA is a part of a chain and DRB is not. */
747 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
749 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
750 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
754 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
756 /* DRB's init is smaller than the init of the stmt previously marked
757 as the first stmt of the interleaving chain of DRA. Therefore, we
758 update FIRST_STMT and put DRB in the head of the list. */
759 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
760 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
762 /* Update all the stmts in the list to point to the new FIRST_STMT. */
763 tmp = old_first_stmt;
766 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
767 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
772 /* Insert DRB in the list of DRA. */
773 vect_insert_into_interleaving_chain (drb, dra);
774 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
779 /* 4. both DRA and DRB are in some interleaving chains. */
780 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
781 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
782 if (first_a == first_b)
784 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
785 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
787 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
789 /* Insert the nodes of DRA chain into the DRB chain.
790 After inserting a node, continue from this node of the DRB chain (don't
791 start from the beginning. */
792 node = DR_GROUP_FIRST_DR (stmtinfo_a);
793 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
794 first_stmt = first_b;
798 /* Insert the nodes of DRB chain into the DRA chain.
799 After inserting a node, continue from this node of the DRA chain (don't
800 start from the beginning. */
801 node = DR_GROUP_FIRST_DR (stmtinfo_b);
802 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
803 first_stmt = first_a;
808 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
809 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
812 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
813 if (tree_int_cst_compare (next_init, node_init) > 0)
816 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
817 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
822 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
826 /* We got to the end of the list. Insert here. */
827 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
828 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
831 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
832 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
837 /* Function vect_equal_offsets.
839 Check if OFFSET1 and OFFSET2 are identical expressions. */
842 vect_equal_offsets (tree offset1, tree offset2)
846 STRIP_NOPS (offset1);
847 STRIP_NOPS (offset2);
849 if (offset1 == offset2)
852 if (TREE_CODE (offset1) != TREE_CODE (offset2)
853 || !BINARY_CLASS_P (offset1)
854 || !BINARY_CLASS_P (offset2))
857 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
858 TREE_OPERAND (offset2, 0));
859 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
860 TREE_OPERAND (offset2, 1));
862 return (res0 && res1);
866 /* Function vect_check_interleaving.
868 Check if DRA and DRB are a part of interleaving. In case they are, insert
869 DRA and DRB in an interleaving chain. */
872 vect_check_interleaving (struct data_reference *dra,
873 struct data_reference *drb)
875 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
877 /* Check that the data-refs have same first location (except init) and they
878 are both either store or load (not load and store). */
879 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
880 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
881 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
882 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
883 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
884 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
885 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
886 || DR_IS_READ (dra) != DR_IS_READ (drb))
890 1. data-refs are of the same type
891 2. their steps are equal
892 3. the step is greater than the difference between data-refs' inits */
893 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
894 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
896 if (type_size_a != type_size_b
897 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
900 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
901 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
902 step = TREE_INT_CST_LOW (DR_STEP (dra));
906 /* If init_a == init_b + the size of the type * k, we have an interleaving,
907 and DRB is accessed before DRA. */
908 diff_mod_size = (init_a - init_b) % type_size_a;
910 if ((init_a - init_b) > step)
913 if (diff_mod_size == 0)
915 vect_update_interleaving_chain (drb, dra);
916 if (vect_print_dump_info (REPORT_DR_DETAILS))
918 fprintf (vect_dump, "Detected interleaving ");
919 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
920 fprintf (vect_dump, " and ");
921 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
928 /* If init_b == init_a + the size of the type * k, we have an
929 interleaving, and DRA is accessed before DRB. */
930 diff_mod_size = (init_b - init_a) % type_size_a;
932 if ((init_b - init_a) > step)
935 if (diff_mod_size == 0)
937 vect_update_interleaving_chain (dra, drb);
938 if (vect_print_dump_info (REPORT_DR_DETAILS))
940 fprintf (vect_dump, "Detected interleaving ");
941 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
942 fprintf (vect_dump, " and ");
943 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
951 /* Function vect_analyze_data_ref_dependence.
953 Return TRUE if there (might) exist a dependence between a memory-reference
954 DRA and a memory-reference DRB. */
957 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
958 loop_vec_info loop_vinfo)
961 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
962 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
963 struct data_reference *dra = DDR_A (ddr);
964 struct data_reference *drb = DDR_B (ddr);
965 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
966 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
967 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
968 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
969 lambda_vector dist_v;
970 unsigned int loop_depth;
972 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
974 /* Independent data accesses. */
975 vect_check_interleaving (dra, drb);
979 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
982 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
984 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
987 "not vectorized: can't determine dependence between ");
988 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
989 fprintf (vect_dump, " and ");
990 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
995 if (DDR_NUM_DIST_VECTS (ddr) == 0)
997 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
999 fprintf (vect_dump, "not vectorized: bad dist vector for ");
1000 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1001 fprintf (vect_dump, " and ");
1002 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1007 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1008 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1010 int dist = dist_v[loop_depth];
1012 if (vect_print_dump_info (REPORT_DR_DETAILS))
1013 fprintf (vect_dump, "dependence distance = %d.", dist);
1015 /* Same loop iteration. */
1016 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1018 /* Two references with distance zero have the same alignment. */
1019 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1020 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1021 if (vect_print_dump_info (REPORT_ALIGNMENT))
1022 fprintf (vect_dump, "accesses have the same alignment.");
1023 if (vect_print_dump_info (REPORT_DR_DETAILS))
1025 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1026 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1027 fprintf (vect_dump, " and ");
1028 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1031 /* For interleaving, mark that there is a read-write dependency if
1032 necessary. We check before that one of the data-refs is store. */
1033 if (DR_IS_READ (dra))
1034 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1037 if (DR_IS_READ (drb))
1038 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1044 if (abs (dist) >= vectorization_factor)
1046 /* Dependence distance does not create dependence, as far as vectorization
1047 is concerned, in this case. */
1048 if (vect_print_dump_info (REPORT_DR_DETAILS))
1049 fprintf (vect_dump, "dependence distance >= VF.");
1053 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1056 "not vectorized: possible dependence between data-refs ");
1057 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1058 fprintf (vect_dump, " and ");
1059 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1069 /* Function vect_analyze_data_ref_dependences.
1071 Examine all the data references in the loop, and make sure there do not
1072 exist any data dependences between them. */
1075 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1078 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1079 struct data_dependence_relation *ddr;
1081 if (vect_print_dump_info (REPORT_DETAILS))
1082 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1084 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1085 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1092 /* Function vect_compute_data_ref_alignment
1094 Compute the misalignment of the data reference DR.
1097 1. If during the misalignment computation it is found that the data reference
1098 cannot be vectorized then false is returned.
1099 2. DR_MISALIGNMENT (DR) is defined.
1101 FOR NOW: No analysis is actually performed. Misalignment is calculated
1102 only for trivial cases. TODO. */
1105 vect_compute_data_ref_alignment (struct data_reference *dr)
1107 tree stmt = DR_STMT (dr);
1108 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1109 tree ref = DR_REF (dr);
1111 tree base, base_addr;
1114 tree aligned_to, alignment;
1116 if (vect_print_dump_info (REPORT_DETAILS))
1117 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1119 /* Initialize misalignment to unknown. */
1120 DR_MISALIGNMENT (dr) = -1;
1122 misalign = DR_OFFSET_MISALIGNMENT (dr);
1123 aligned_to = DR_ALIGNED_TO (dr);
1124 base_addr = DR_BASE_ADDRESS (dr);
1125 base = build_fold_indirect_ref (base_addr);
1126 vectype = STMT_VINFO_VECTYPE (stmt_info);
1127 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1129 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1132 if (vect_print_dump_info (REPORT_DETAILS))
1134 fprintf (vect_dump, "Unknown alignment for access: ");
1135 print_generic_expr (vect_dump, base, TDF_SLIM);
1141 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1143 || (TREE_CODE (base_addr) == SSA_NAME
1144 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1145 TREE_TYPE (base_addr)))),
1147 base_aligned = true;
1149 base_aligned = false;
1153 /* Do not change the alignment of global variables if
1154 flag_section_anchors is enabled. */
1155 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1156 || (TREE_STATIC (base) && flag_section_anchors))
1158 if (vect_print_dump_info (REPORT_DETAILS))
1160 fprintf (vect_dump, "can't force alignment of ref: ");
1161 print_generic_expr (vect_dump, ref, TDF_SLIM);
1166 /* Force the alignment of the decl.
1167 NOTE: This is the only change to the code we make during
1168 the analysis phase, before deciding to vectorize the loop. */
1169 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "force alignment");
1171 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1172 DECL_USER_ALIGN (base) = 1;
1175 /* At this point we assume that the base is aligned. */
1176 gcc_assert (base_aligned
1177 || (TREE_CODE (base) == VAR_DECL
1178 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1180 /* Modulo alignment. */
1181 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1183 if (!host_integerp (misalign, 1))
1185 /* Negative or overflowed misalignment value. */
1186 if (vect_print_dump_info (REPORT_DETAILS))
1187 fprintf (vect_dump, "unexpected misalign value");
1191 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1193 if (vect_print_dump_info (REPORT_DETAILS))
1195 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1196 print_generic_expr (vect_dump, ref, TDF_SLIM);
1203 /* Function vect_compute_data_refs_alignment
1205 Compute the misalignment of data references in the loop.
1206 Return FALSE if a data reference is found that cannot be vectorized. */
1209 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1211 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1212 struct data_reference *dr;
1215 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1216 if (!vect_compute_data_ref_alignment (dr))
1223 /* Function vect_update_misalignment_for_peel
1225 DR - the data reference whose misalignment is to be adjusted.
1226 DR_PEEL - the data reference whose misalignment is being made
1227 zero in the vector loop by the peel.
1228 NPEEL - the number of iterations in the peel loop if the misalignment
1229 of DR_PEEL is known at compile time. */
1232 vect_update_misalignment_for_peel (struct data_reference *dr,
1233 struct data_reference *dr_peel, int npeel)
1236 VEC(dr_p,heap) *same_align_drs;
1237 struct data_reference *current_dr;
1238 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1239 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1240 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1241 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1243 /* For interleaved data accesses the step in the loop must be multiplied by
1244 the size of the interleaving group. */
1245 if (DR_GROUP_FIRST_DR (stmt_info))
1246 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1247 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1248 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1250 if (known_alignment_for_access_p (dr)
1251 && known_alignment_for_access_p (dr_peel)
1252 && (DR_MISALIGNMENT (dr) / dr_size ==
1253 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1255 DR_MISALIGNMENT (dr) = 0;
1259 /* It can be assumed that the data refs with the same alignment as dr_peel
1260 are aligned in the vector loop. */
1262 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1263 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1265 if (current_dr != dr)
1267 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1268 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1269 DR_MISALIGNMENT (dr) = 0;
1273 if (known_alignment_for_access_p (dr)
1274 && known_alignment_for_access_p (dr_peel))
1276 DR_MISALIGNMENT (dr) += npeel * dr_size;
1277 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1281 if (vect_print_dump_info (REPORT_DETAILS))
1282 fprintf (vect_dump, "Setting misalignment to -1.");
1283 DR_MISALIGNMENT (dr) = -1;
1287 /* Function vect_verify_datarefs_alignment
1289 Return TRUE if all data references in the loop can be
1290 handled with respect to alignment. */
1293 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1295 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1296 struct data_reference *dr;
1297 enum dr_alignment_support supportable_dr_alignment;
1300 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1302 tree stmt = DR_STMT (dr);
1303 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1305 /* For interleaving, only the alignment of the first access matters. */
1306 if (DR_GROUP_FIRST_DR (stmt_info)
1307 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1310 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1311 if (!supportable_dr_alignment)
1313 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1315 if (DR_IS_READ (dr))
1317 "not vectorized: unsupported unaligned load.");
1320 "not vectorized: unsupported unaligned store.");
1324 if (supportable_dr_alignment != dr_aligned
1325 && vect_print_dump_info (REPORT_ALIGNMENT))
1326 fprintf (vect_dump, "Vectorizing an unaligned access.");
1332 /* Function vect_enhance_data_refs_alignment
1334 This pass will use loop versioning and loop peeling in order to enhance
1335 the alignment of data references in the loop.
1337 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1338 original loop is to be vectorized; Any other loops that are created by
1339 the transformations performed in this pass - are not supposed to be
1340 vectorized. This restriction will be relaxed.
1342 This pass will require a cost model to guide it whether to apply peeling
1343 or versioning or a combination of the two. For example, the scheme that
1344 intel uses when given a loop with several memory accesses, is as follows:
1345 choose one memory access ('p') which alignment you want to force by doing
1346 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1347 other accesses are not necessarily aligned, or (2) use loop versioning to
1348 generate one loop in which all accesses are aligned, and another loop in
1349 which only 'p' is necessarily aligned.
1351 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1352 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1353 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1355 Devising a cost model is the most critical aspect of this work. It will
1356 guide us on which access to peel for, whether to use loop versioning, how
1357 many versions to create, etc. The cost model will probably consist of
1358 generic considerations as well as target specific considerations (on
1359 powerpc for example, misaligned stores are more painful than misaligned
1362 Here are the general steps involved in alignment enhancements:
1364 -- original loop, before alignment analysis:
1365 for (i=0; i<N; i++){
1366 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1367 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1370 -- After vect_compute_data_refs_alignment:
1371 for (i=0; i<N; i++){
1372 x = q[i]; # DR_MISALIGNMENT(q) = 3
1373 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1376 -- Possibility 1: we do loop versioning:
1378 for (i=0; i<N; i++){ # loop 1A
1379 x = q[i]; # DR_MISALIGNMENT(q) = 3
1380 p[i] = y; # DR_MISALIGNMENT(p) = 0
1384 for (i=0; i<N; i++){ # loop 1B
1385 x = q[i]; # DR_MISALIGNMENT(q) = 3
1386 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1390 -- Possibility 2: we do loop peeling:
1391 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1395 for (i = 3; i < N; i++){ # loop 2A
1396 x = q[i]; # DR_MISALIGNMENT(q) = 0
1397 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1400 -- Possibility 3: combination of loop peeling and versioning:
1401 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1406 for (i = 3; i<N; i++){ # loop 3A
1407 x = q[i]; # DR_MISALIGNMENT(q) = 0
1408 p[i] = y; # DR_MISALIGNMENT(p) = 0
1412 for (i = 3; i<N; i++){ # loop 3B
1413 x = q[i]; # DR_MISALIGNMENT(q) = 0
1414 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1418 These loops are later passed to loop_transform to be vectorized. The
1419 vectorizer will use the alignment information to guide the transformation
1420 (whether to generate regular loads/stores, or with special handling for
1424 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1426 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1427 enum dr_alignment_support supportable_dr_alignment;
1428 struct data_reference *dr0 = NULL;
1429 struct data_reference *dr;
1431 bool do_peeling = false;
1432 bool do_versioning = false;
1435 stmt_vec_info stmt_info;
1437 if (vect_print_dump_info (REPORT_DETAILS))
1438 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1440 /* While cost model enhancements are expected in the future, the high level
1441 view of the code at this time is as follows:
1443 A) If there is a misaligned write then see if peeling to align this write
1444 can make all data references satisfy vect_supportable_dr_alignment.
1445 If so, update data structures as needed and return true. Note that
1446 at this time vect_supportable_dr_alignment is known to return false
1447 for a misaligned write.
1449 B) If peeling wasn't possible and there is a data reference with an
1450 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1451 then see if loop versioning checks can be used to make all data
1452 references satisfy vect_supportable_dr_alignment. If so, update
1453 data structures as needed and return true.
1455 C) If neither peeling nor versioning were successful then return false if
1456 any data reference does not satisfy vect_supportable_dr_alignment.
1458 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1460 Note, Possibility 3 above (which is peeling and versioning together) is not
1461 being done at this time. */
1463 /* (1) Peeling to force alignment. */
1465 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1467 + How many accesses will become aligned due to the peeling
1468 - How many accesses will become unaligned due to the peeling,
1469 and the cost of misaligned accesses.
1470 - The cost of peeling (the extra runtime checks, the increase
1473 The scheme we use FORNOW: peel to force the alignment of the first
1474 misaligned store in the loop.
1475 Rationale: misaligned stores are not yet supported.
1477 TODO: Use a cost model. */
1479 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1481 stmt = DR_STMT (dr);
1482 stmt_info = vinfo_for_stmt (stmt);
1484 /* For interleaving, only the alignment of the first access
1486 if (DR_GROUP_FIRST_DR (stmt_info)
1487 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1490 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1492 if (DR_GROUP_FIRST_DR (stmt_info))
1494 /* For interleaved access we peel only if number of iterations in
1495 the prolog loop ({VF - misalignment}), is a multiple of the
1496 number of the interleaved accesses. */
1497 int elem_size, mis_in_elements;
1498 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1500 /* FORNOW: handle only known alignment. */
1501 if (!known_alignment_for_access_p (dr))
1507 elem_size = UNITS_PER_SIMD_WORD / vf;
1508 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1510 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1522 /* Often peeling for alignment will require peeling for loop-bound, which in
1523 turn requires that we know how to adjust the loop ivs after the loop. */
1524 if (!vect_can_advance_ivs_p (loop_vinfo))
1532 if (known_alignment_for_access_p (dr0))
1534 /* Since it's known at compile time, compute the number of iterations
1535 in the peeled loop (the peeling factor) for use in updating
1536 DR_MISALIGNMENT values. The peeling factor is the vectorization
1537 factor minus the misalignment as an element count. */
1538 mis = DR_MISALIGNMENT (dr0);
1539 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1540 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1542 /* For interleaved data access every iteration accesses all the
1543 members of the group, therefore we divide the number of iterations
1544 by the group size. */
1545 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1546 if (DR_GROUP_FIRST_DR (stmt_info))
1547 npeel /= DR_GROUP_SIZE (stmt_info);
1549 if (vect_print_dump_info (REPORT_DETAILS))
1550 fprintf (vect_dump, "Try peeling by %d", npeel);
1553 /* Ensure that all data refs can be vectorized after the peel. */
1554 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1556 int save_misalignment;
1561 stmt = DR_STMT (dr);
1562 stmt_info = vinfo_for_stmt (stmt);
1563 /* For interleaving, only the alignment of the first access
1565 if (DR_GROUP_FIRST_DR (stmt_info)
1566 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1569 save_misalignment = DR_MISALIGNMENT (dr);
1570 vect_update_misalignment_for_peel (dr, dr0, npeel);
1571 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1572 DR_MISALIGNMENT (dr) = save_misalignment;
1574 if (!supportable_dr_alignment)
1583 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1584 If the misalignment of DR_i is identical to that of dr0 then set
1585 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1586 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1587 by the peeling factor times the element size of DR_i (MOD the
1588 vectorization factor times the size). Otherwise, the
1589 misalignment of DR_i must be set to unknown. */
1590 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1592 vect_update_misalignment_for_peel (dr, dr0, npeel);
1594 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1595 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1596 DR_MISALIGNMENT (dr0) = 0;
1597 if (vect_print_dump_info (REPORT_ALIGNMENT))
1598 fprintf (vect_dump, "Alignment of access forced using peeling.");
1600 if (vect_print_dump_info (REPORT_DETAILS))
1601 fprintf (vect_dump, "Peeling for alignment will be applied.");
1603 stat = vect_verify_datarefs_alignment (loop_vinfo);
1610 /* (2) Versioning to force alignment. */
1612 /* Try versioning if:
1613 1) flag_tree_vect_loop_version is TRUE
1614 2) optimize_size is FALSE
1615 3) there is at least one unsupported misaligned data ref with an unknown
1617 4) all misaligned data refs with a known misalignment are supported, and
1618 5) the number of runtime alignment checks is within reason. */
1620 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1624 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1626 stmt = DR_STMT (dr);
1627 stmt_info = vinfo_for_stmt (stmt);
1629 /* For interleaving, only the alignment of the first access
1631 if (aligned_access_p (dr)
1632 || (DR_GROUP_FIRST_DR (stmt_info)
1633 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1636 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1638 if (!supportable_dr_alignment)
1644 if (known_alignment_for_access_p (dr)
1645 || VEC_length (tree,
1646 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1647 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1649 do_versioning = false;
1653 stmt = DR_STMT (dr);
1654 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1655 gcc_assert (vectype);
1657 /* The rightmost bits of an aligned address must be zeros.
1658 Construct the mask needed for this test. For example,
1659 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1660 mask must be 15 = 0xf. */
1661 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1663 /* FORNOW: use the same mask to test all potentially unaligned
1664 references in the loop. The vectorizer currently supports
1665 a single vector size, see the reference to
1666 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1667 vectorization factor is computed. */
1668 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1669 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1670 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1671 VEC_safe_push (tree, heap,
1672 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1677 /* Versioning requires at least one misaligned data reference. */
1678 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1679 do_versioning = false;
1680 else if (!do_versioning)
1681 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1686 VEC(tree,heap) *may_misalign_stmts
1687 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1690 /* It can now be assumed that the data references in the statements
1691 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1692 of the loop being vectorized. */
1693 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1695 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1696 dr = STMT_VINFO_DATA_REF (stmt_info);
1697 DR_MISALIGNMENT (dr) = 0;
1698 if (vect_print_dump_info (REPORT_ALIGNMENT))
1699 fprintf (vect_dump, "Alignment of access forced using versioning.");
1702 if (vect_print_dump_info (REPORT_DETAILS))
1703 fprintf (vect_dump, "Versioning for alignment will be applied.");
1705 /* Peeling and versioning can't be done together at this time. */
1706 gcc_assert (! (do_peeling && do_versioning));
1708 stat = vect_verify_datarefs_alignment (loop_vinfo);
1713 /* This point is reached if neither peeling nor versioning is being done. */
1714 gcc_assert (! (do_peeling || do_versioning));
1716 stat = vect_verify_datarefs_alignment (loop_vinfo);
1721 /* Function vect_analyze_data_refs_alignment
1723 Analyze the alignment of the data-references in the loop.
1724 Return FALSE if a data reference is found that cannot be vectorized. */
1727 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1729 if (vect_print_dump_info (REPORT_DETAILS))
1730 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1732 if (!vect_compute_data_refs_alignment (loop_vinfo))
1734 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1736 "not vectorized: can't calculate alignment for data ref.");
1744 /* Function vect_analyze_data_ref_access.
1746 Analyze the access pattern of the data-reference DR. For now, a data access
1747 has to be consecutive to be considered vectorizable. */
1750 vect_analyze_data_ref_access (struct data_reference *dr)
1752 tree step = DR_STEP (dr);
1753 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1754 tree scalar_type = TREE_TYPE (DR_REF (dr));
1755 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1756 tree stmt = DR_STMT (dr);
1757 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1758 interleaving group (including gaps). */
1759 HOST_WIDE_INT stride = dr_step / type_size;
1763 if (vect_print_dump_info (REPORT_DETAILS))
1764 fprintf (vect_dump, "bad data-ref access");
1769 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1771 /* Mark that it is not interleaving. */
1772 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1776 /* Not consecutive access is possible only if it is a part of interleaving. */
1777 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1779 /* Check if it this DR is a part of interleaving, and is a single
1780 element of the group that is accessed in the loop. */
1782 /* Gaps are supported only for loads. STEP must be a multiple of the type
1783 size. The size of the group must be a power of 2. */
1785 && (dr_step % type_size) == 0
1787 && exact_log2 (stride) != -1)
1789 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1790 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1791 if (vect_print_dump_info (REPORT_DR_DETAILS))
1793 fprintf (vect_dump, "Detected single element interleaving %d ",
1794 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1795 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1796 fprintf (vect_dump, " step ");
1797 print_generic_expr (vect_dump, step, TDF_SLIM);
1801 if (vect_print_dump_info (REPORT_DETAILS))
1802 fprintf (vect_dump, "not consecutive access");
1806 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1808 /* First stmt in the interleaving chain. Check the chain. */
1809 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1810 struct data_reference *data_ref = dr;
1811 unsigned int count = 1;
1813 tree prev_init = DR_INIT (data_ref);
1815 HOST_WIDE_INT diff, count_in_bytes;
1819 /* Skip same data-refs. In case that two or more stmts share data-ref
1820 (supported only for loads), we vectorize only the first stmt, and
1821 the rest get their vectorized loads from the first one. */
1822 if (!tree_int_cst_compare (DR_INIT (data_ref),
1823 DR_INIT (STMT_VINFO_DATA_REF (
1824 vinfo_for_stmt (next)))))
1826 if (!DR_IS_READ (data_ref))
1828 if (vect_print_dump_info (REPORT_DETAILS))
1829 fprintf (vect_dump, "Two store stmts share the same dr.");
1833 /* Check that there is no load-store dependencies for this loads
1834 to prevent a case of load-store-load to the same location. */
1835 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1836 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1838 if (vect_print_dump_info (REPORT_DETAILS))
1840 "READ_WRITE dependence in interleaving.");
1844 /* For load use the same data-ref load. */
1845 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1848 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1853 /* Check that all the accesses have the same STEP. */
1854 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1855 if (tree_int_cst_compare (step, next_step))
1857 if (vect_print_dump_info (REPORT_DETAILS))
1858 fprintf (vect_dump, "not consecutive access in interleaving");
1862 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1863 /* Check that the distance between two accesses is equal to the type
1864 size. Otherwise, we have gaps. */
1865 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1866 - TREE_INT_CST_LOW (prev_init)) / type_size;
1867 if (!DR_IS_READ (data_ref) && diff != 1)
1869 if (vect_print_dump_info (REPORT_DETAILS))
1870 fprintf (vect_dump, "interleaved store with gaps");
1873 /* Store the gap from the previous member of the group. If there is no
1874 gap in the access, DR_GROUP_GAP is always 1. */
1875 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1877 prev_init = DR_INIT (data_ref);
1878 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1879 /* Count the number of data-refs in the chain. */
1883 /* COUNT is the number of accesses found, we multiply it by the size of
1884 the type to get COUNT_IN_BYTES. */
1885 count_in_bytes = type_size * count;
1887 /* Check that the size of the interleaving is not greater than STEP. */
1888 if (dr_step < count_in_bytes)
1890 if (vect_print_dump_info (REPORT_DETAILS))
1892 fprintf (vect_dump, "interleaving size is greater than step for ");
1893 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1898 /* Check that the size of the interleaving is equal to STEP for stores,
1899 i.e., that there are no gaps. */
1900 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1902 if (vect_print_dump_info (REPORT_DETAILS))
1903 fprintf (vect_dump, "interleaved store with gaps");
1907 /* Check that STEP is a multiple of type size. */
1908 if ((dr_step % type_size) != 0)
1910 if (vect_print_dump_info (REPORT_DETAILS))
1912 fprintf (vect_dump, "step is not a multiple of type size: step ");
1913 print_generic_expr (vect_dump, step, TDF_SLIM);
1914 fprintf (vect_dump, " size ");
1915 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1921 /* FORNOW: we handle only interleaving that is a power of 2. */
1922 if (exact_log2 (stride) == -1)
1924 if (vect_print_dump_info (REPORT_DETAILS))
1925 fprintf (vect_dump, "interleaving is not a power of 2");
1928 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1934 /* Function vect_analyze_data_ref_accesses.
1936 Analyze the access pattern of all the data references in the loop.
1938 FORNOW: the only access pattern that is considered vectorizable is a
1939 simple step 1 (consecutive) access.
1941 FORNOW: handle only arrays and pointer accesses. */
1944 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1947 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1948 struct data_reference *dr;
1950 if (vect_print_dump_info (REPORT_DETAILS))
1951 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1953 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1954 if (!vect_analyze_data_ref_access (dr))
1956 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1957 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1965 /* Function vect_analyze_data_refs.
1967 Find all the data references in the loop.
1969 The general structure of the analysis of data refs in the vectorizer is as
1971 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1972 find and analyze all data-refs in the loop and their dependences.
1973 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1974 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1975 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1980 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1982 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1984 VEC (data_reference_p, heap) *datarefs;
1985 struct data_reference *dr;
1988 if (vect_print_dump_info (REPORT_DETAILS))
1989 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1991 compute_data_dependences_for_loop (loop, true,
1992 &LOOP_VINFO_DATAREFS (loop_vinfo),
1993 &LOOP_VINFO_DDRS (loop_vinfo));
1995 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1996 from stmt_vec_info struct to DR and vectype. */
1997 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1999 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2002 stmt_vec_info stmt_info;
2004 if (!dr || !DR_REF (dr))
2006 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2007 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2011 /* Update DR field in stmt_vec_info struct. */
2012 stmt = DR_STMT (dr);
2013 stmt_info = vinfo_for_stmt (stmt);
2015 if (STMT_VINFO_DATA_REF (stmt_info))
2017 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2020 "not vectorized: more than one data ref in stmt: ");
2021 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2025 STMT_VINFO_DATA_REF (stmt_info) = dr;
2027 /* Check that analysis of the data-ref succeeded. */
2028 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2033 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2034 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2038 if (!DR_MEMTAG (dr))
2040 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2042 fprintf (vect_dump, "not vectorized: no memory tag for ");
2043 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2048 /* Set vectype for STMT. */
2049 scalar_type = TREE_TYPE (DR_REF (dr));
2050 STMT_VINFO_VECTYPE (stmt_info) =
2051 get_vectype_for_scalar_type (scalar_type);
2052 if (!STMT_VINFO_VECTYPE (stmt_info))
2054 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2057 "not vectorized: no vectype for stmt: ");
2058 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2059 fprintf (vect_dump, " scalar_type: ");
2060 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2070 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2072 /* Function vect_mark_relevant.
2074 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2077 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2078 enum vect_relevant relevant, bool live_p)
2080 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2081 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2082 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2084 if (vect_print_dump_info (REPORT_DETAILS))
2085 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2087 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2091 /* This is the last stmt in a sequence that was detected as a
2092 pattern that can potentially be vectorized. Don't mark the stmt
2093 as relevant/live because it's not going to vectorized.
2094 Instead mark the pattern-stmt that replaces it. */
2095 if (vect_print_dump_info (REPORT_DETAILS))
2096 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2097 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2098 stmt_info = vinfo_for_stmt (pattern_stmt);
2099 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2100 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2101 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2102 stmt = pattern_stmt;
2105 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2106 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2107 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2109 if (TREE_CODE (stmt) == PHI_NODE)
2110 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2111 or live will fail vectorization later on. */
2114 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2115 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2117 if (vect_print_dump_info (REPORT_DETAILS))
2118 fprintf (vect_dump, "already marked relevant/live.");
2122 VEC_safe_push (tree, heap, *worklist, stmt);
2126 /* Function vect_stmt_relevant_p.
2128 Return true if STMT in loop that is represented by LOOP_VINFO is
2129 "relevant for vectorization".
2131 A stmt is considered "relevant for vectorization" if:
2132 - it has uses outside the loop.
2133 - it has vdefs (it alters memory).
2134 - control stmts in the loop (except for the exit condition).
2136 CHECKME: what other side effects would the vectorizer allow? */
2139 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2140 enum vect_relevant *relevant, bool *live_p)
2142 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2143 ssa_op_iter op_iter;
2144 imm_use_iterator imm_iter;
2145 use_operand_p use_p;
2146 def_operand_p def_p;
2148 *relevant = vect_unused_in_loop;
2151 /* cond stmt other than loop exit cond. */
2152 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2153 *relevant = vect_used_in_loop;
2155 /* changing memory. */
2156 if (TREE_CODE (stmt) != PHI_NODE)
2157 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2159 if (vect_print_dump_info (REPORT_DETAILS))
2160 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2161 *relevant = vect_used_in_loop;
2164 /* uses outside the loop. */
2165 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2167 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2169 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2170 if (!flow_bb_inside_loop_p (loop, bb))
2172 if (vect_print_dump_info (REPORT_DETAILS))
2173 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2175 /* We expect all such uses to be in the loop exit phis
2176 (because of loop closed form) */
2177 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2178 gcc_assert (bb == single_exit (loop)->dest);
2185 return (*live_p || *relevant);
2189 /* Function vect_mark_stmts_to_be_vectorized.
2191 Not all stmts in the loop need to be vectorized. For example:
2200 Stmt 1 and 3 do not need to be vectorized, because loop control and
2201 addressing of vectorized data-refs are handled differently.
2203 This pass detects such stmts. */
2206 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2208 VEC(tree,heap) *worklist;
2209 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2210 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2211 unsigned int nbbs = loop->num_nodes;
2212 block_stmt_iterator si;
2217 stmt_vec_info stmt_vinfo;
2221 enum vect_relevant relevant;
2223 enum vect_def_type dt;
2225 if (vect_print_dump_info (REPORT_DETAILS))
2226 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2228 worklist = VEC_alloc (tree, heap, 64);
2230 /* 1. Init worklist. */
2233 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2235 if (vect_print_dump_info (REPORT_DETAILS))
2237 fprintf (vect_dump, "init: phi relevant? ");
2238 print_generic_expr (vect_dump, phi, TDF_SLIM);
2241 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2242 vect_mark_relevant (&worklist, phi, relevant, live_p);
2245 for (i = 0; i < nbbs; i++)
2248 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2250 stmt = bsi_stmt (si);
2252 if (vect_print_dump_info (REPORT_DETAILS))
2254 fprintf (vect_dump, "init: stmt relevant? ");
2255 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2258 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2259 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2264 /* 2. Process_worklist */
2266 while (VEC_length (tree, worklist) > 0)
2268 stmt = VEC_pop (tree, worklist);
2270 if (vect_print_dump_info (REPORT_DETAILS))
2272 fprintf (vect_dump, "worklist: examine stmt: ");
2273 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2276 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2277 in the loop, mark the stmt that defines it (DEF_STMT) as
2278 relevant/irrelevant and live/dead according to the liveness and
2279 relevance properties of STMT.
2282 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2284 ann = stmt_ann (stmt);
2285 stmt_vinfo = vinfo_for_stmt (stmt);
2287 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2288 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2290 /* Generally, the liveness and relevance properties of STMT are
2291 propagated to the DEF_STMTs of its USEs:
2292 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2293 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2298 If USE is used only for address computations (e.g. array indexing),
2299 which does not need to be directly vectorized, then the
2300 liveness/relevance of the respective DEF_STMT is left unchanged.
2303 If STMT has been identified as defining a reduction variable, then
2304 we want to set liveness/relevance as follows:
2305 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2306 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2307 because even though STMT is classified as live (since it defines a
2308 value that is used across loop iterations) and irrelevant (since it
2309 is not used inside the loop), it will be vectorized, and therefore
2310 the corresponding DEF_STMTs need to marked as relevant.
2311 We distinguish between two kinds of relevant stmts - those that are
2312 used by a reduction computation, and those that are (also) used by
2313 a regular computation. This allows us later on to identify stmts
2314 that are used solely by a reduction, and therefore the order of
2315 the results that they produce does not have to be kept.
2319 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2321 gcc_assert (relevant == vect_unused_in_loop && live_p);
2322 relevant = vect_used_by_reduction;
2327 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2329 if (vect_print_dump_info (REPORT_DETAILS))
2331 fprintf (vect_dump, "worklist: examine use %d: ", i++);
2332 print_generic_expr (vect_dump, use, TDF_SLIM);
2335 /* case 1: we are only interested in uses that need to be vectorized.
2336 Uses that are used for address computation are not considered
2339 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2342 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2344 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2345 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2346 VEC_free (tree, heap, worklist);
2350 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2353 bb = bb_for_stmt (def_stmt);
2354 if (!flow_bb_inside_loop_p (loop, bb))
2356 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2358 } /* while worklist */
2360 VEC_free (tree, heap, worklist);
2365 /* Function vect_can_advance_ivs_p
2367 In case the number of iterations that LOOP iterates is unknown at compile
2368 time, an epilog loop will be generated, and the loop induction variables
2369 (IVs) will be "advanced" to the value they are supposed to take just before
2370 the epilog loop. Here we check that the access function of the loop IVs
2371 and the expression that represents the loop bound are simple enough.
2372 These restrictions will be relaxed in the future. */
2375 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2377 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2378 basic_block bb = loop->header;
2381 /* Analyze phi functions of the loop header. */
2383 if (vect_print_dump_info (REPORT_DETAILS))
2384 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2386 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2388 tree access_fn = NULL;
2389 tree evolution_part;
2391 if (vect_print_dump_info (REPORT_DETAILS))
2393 fprintf (vect_dump, "Analyze phi: ");
2394 print_generic_expr (vect_dump, phi, TDF_SLIM);
2397 /* Skip virtual phi's. The data dependences that are associated with
2398 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2400 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2402 if (vect_print_dump_info (REPORT_DETAILS))
2403 fprintf (vect_dump, "virtual phi. skip.");
2407 /* Skip reduction phis. */
2409 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2411 if (vect_print_dump_info (REPORT_DETAILS))
2412 fprintf (vect_dump, "reduc phi. skip.");
2416 /* Analyze the evolution function. */
2418 access_fn = instantiate_parameters
2419 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2423 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "No Access function.");
2428 if (vect_print_dump_info (REPORT_DETAILS))
2430 fprintf (vect_dump, "Access function of PHI: ");
2431 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2434 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2436 if (evolution_part == NULL_TREE)
2438 if (vect_print_dump_info (REPORT_DETAILS))
2439 fprintf (vect_dump, "No evolution.");
2443 /* FORNOW: We do not transform initial conditions of IVs
2444 which evolution functions are a polynomial of degree >= 2. */
2446 if (tree_is_chrec (evolution_part))
2454 /* Function vect_get_loop_niters.
2456 Determine how many iterations the loop is executed.
2457 If an expression that represents the number of iterations
2458 can be constructed, place it in NUMBER_OF_ITERATIONS.
2459 Return the loop exit condition. */
2462 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2466 if (vect_print_dump_info (REPORT_DETAILS))
2467 fprintf (vect_dump, "=== get_loop_niters ===");
2469 niters = number_of_exit_cond_executions (loop);
2471 if (niters != NULL_TREE
2472 && niters != chrec_dont_know)
2474 *number_of_iterations = niters;
2476 if (vect_print_dump_info (REPORT_DETAILS))
2478 fprintf (vect_dump, "==> get_loop_niters:" );
2479 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2483 return get_loop_exit_condition (loop);
2487 /* Function vect_analyze_loop_form.
2489 Verify the following restrictions (some may be relaxed in the future):
2490 - it's an inner-most loop
2491 - number of BBs = 2 (which are the loop header and the latch)
2492 - the loop has a pre-header
2493 - the loop has a single entry and exit
2494 - the loop exit condition is simple enough, and the number of iterations
2495 can be analyzed (a countable loop). */
2497 static loop_vec_info
2498 vect_analyze_loop_form (struct loop *loop)
2500 loop_vec_info loop_vinfo;
2502 tree number_of_iterations = NULL;
2504 if (vect_print_dump_info (REPORT_DETAILS))
2505 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2509 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2510 fprintf (vect_dump, "not vectorized: nested loop.");
2514 if (!single_exit (loop)
2515 || loop->num_nodes != 2
2516 || EDGE_COUNT (loop->header->preds) != 2)
2518 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2520 if (!single_exit (loop))
2521 fprintf (vect_dump, "not vectorized: multiple exits.");
2522 else if (loop->num_nodes != 2)
2523 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2524 else if (EDGE_COUNT (loop->header->preds) != 2)
2525 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2531 /* We assume that the loop exit condition is at the end of the loop. i.e,
2532 that the loop is represented as a do-while (with a proper if-guard
2533 before the loop if needed), where the loop header contains all the
2534 executable statements, and the latch is empty. */
2535 if (!empty_block_p (loop->latch)
2536 || phi_nodes (loop->latch))
2538 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2539 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2543 /* Make sure there exists a single-predecessor exit bb: */
2544 if (!single_pred_p (single_exit (loop)->dest))
2546 edge e = single_exit (loop);
2547 if (!(e->flags & EDGE_ABNORMAL))
2549 split_loop_exit_edge (e);
2550 if (vect_print_dump_info (REPORT_DETAILS))
2551 fprintf (vect_dump, "split exit edge.");
2555 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2556 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2561 if (empty_block_p (loop->header))
2563 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2564 fprintf (vect_dump, "not vectorized: empty loop.");
2568 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2571 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2572 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2576 if (!number_of_iterations)
2578 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2580 "not vectorized: number of iterations cannot be computed.");
2584 if (chrec_contains_undetermined (number_of_iterations))
2586 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2587 fprintf (vect_dump, "Infinite number of iterations.");
2591 loop_vinfo = new_loop_vec_info (loop);
2592 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2594 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2596 if (vect_print_dump_info (REPORT_DETAILS))
2598 fprintf (vect_dump, "Symbolic number of iterations is ");
2599 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2603 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2605 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2606 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2610 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2616 /* Function vect_analyze_loop.
2618 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2619 for it. The different analyses will record information in the
2620 loop_vec_info struct. */
2622 vect_analyze_loop (struct loop *loop)
2625 loop_vec_info loop_vinfo;
2627 if (vect_print_dump_info (REPORT_DETAILS))
2628 fprintf (vect_dump, "===== analyze_loop_nest =====");
2630 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2632 loop_vinfo = vect_analyze_loop_form (loop);
2635 if (vect_print_dump_info (REPORT_DETAILS))
2636 fprintf (vect_dump, "bad loop form.");
2640 /* Find all data references in the loop (which correspond to vdefs/vuses)
2641 and analyze their evolution in the loop.
2643 FORNOW: Handle only simple, array references, which
2644 alignment can be forced, and aligned pointer-references. */
2646 ok = vect_analyze_data_refs (loop_vinfo);
2649 if (vect_print_dump_info (REPORT_DETAILS))
2650 fprintf (vect_dump, "bad data references.");
2651 destroy_loop_vec_info (loop_vinfo);
2655 /* Classify all cross-iteration scalar data-flow cycles.
2656 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2658 vect_analyze_scalar_cycles (loop_vinfo);
2660 vect_pattern_recog (loop_vinfo);
2662 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2664 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2667 if (vect_print_dump_info (REPORT_DETAILS))
2668 fprintf (vect_dump, "unexpected pattern.");
2669 destroy_loop_vec_info (loop_vinfo);
2673 /* Analyze the alignment of the data-refs in the loop.
2674 Fail if a data reference is found that cannot be vectorized. */
2676 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2679 if (vect_print_dump_info (REPORT_DETAILS))
2680 fprintf (vect_dump, "bad data alignment.");
2681 destroy_loop_vec_info (loop_vinfo);
2685 ok = vect_determine_vectorization_factor (loop_vinfo);
2688 if (vect_print_dump_info (REPORT_DETAILS))
2689 fprintf (vect_dump, "can't determine vectorization factor.");
2690 destroy_loop_vec_info (loop_vinfo);
2694 /* Analyze data dependences between the data-refs in the loop.
2695 FORNOW: fail at the first data dependence that we encounter. */
2697 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2700 if (vect_print_dump_info (REPORT_DETAILS))
2701 fprintf (vect_dump, "bad data dependence.");
2702 destroy_loop_vec_info (loop_vinfo);
2706 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2707 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2709 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2712 if (vect_print_dump_info (REPORT_DETAILS))
2713 fprintf (vect_dump, "bad data access.");
2714 destroy_loop_vec_info (loop_vinfo);
2718 /* This pass will decide on using loop versioning and/or loop peeling in
2719 order to enhance the alignment of data references in the loop. */
2721 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2724 if (vect_print_dump_info (REPORT_DETAILS))
2725 fprintf (vect_dump, "bad data alignment.");
2726 destroy_loop_vec_info (loop_vinfo);
2730 /* Scan all the operations in the loop and make sure they are
2733 ok = vect_analyze_operations (loop_vinfo);
2736 if (vect_print_dump_info (REPORT_DETAILS))
2737 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2738 destroy_loop_vec_info (loop_vinfo);
2742 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;