1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
104 stmt_vec_info stmt_info;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
179 gcc_assert (stmt_info);
181 /* skip stmts which do not need to be vectorized. */
182 if (!STMT_VINFO_RELEVANT_P (stmt_info)
183 && !STMT_VINFO_LIVE_P (stmt_info))
185 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "skip.");
190 if (!GIMPLE_STMT_P (stmt)
191 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
193 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
195 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
196 print_generic_expr (vect_dump, stmt, TDF_SLIM);
201 if (STMT_VINFO_VECTYPE (stmt_info))
203 /* The only case when a vectype had been already set is for stmts
204 that contain a dataref, or for "pattern-stmts" (stmts generated
205 by the vectorizer to represent/replace a certain idiom). */
206 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
207 || is_pattern_stmt_p (stmt_info));
208 vectype = STMT_VINFO_VECTYPE (stmt_info);
212 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
213 && !is_pattern_stmt_p (stmt_info));
215 /* We set the vectype according to the type of the result (lhs).
216 For stmts whose result-type is different than the type of the
217 arguments (e.g. demotion, promotion), vectype will be reset
218 appropriately (later). Note that we have to visit the smallest
219 datatype in this function, because that determines the VF.
220 If the smallest datatype in the loop is present only as the
221 rhs of a promotion operation - we'd miss it here.
222 However, in such a case, that a variable of this datatype
223 does not appear in the lhs anywhere in the loop, it shouldn't
224 affect the vectorization factor. */
225 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
227 if (vect_print_dump_info (REPORT_DETAILS))
229 fprintf (vect_dump, "get vectype for scalar type: ");
230 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
233 vectype = get_vectype_for_scalar_type (scalar_type);
236 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
239 "not vectorized: unsupported data-type ");
240 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
244 STMT_VINFO_VECTYPE (stmt_info) = vectype;
247 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vectype: ");
250 print_generic_expr (vect_dump, vectype, TDF_SLIM);
253 nunits = TYPE_VECTOR_SUBPARTS (vectype);
254 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "nunits = %d", nunits);
257 if (!vectorization_factor
258 || (nunits > vectorization_factor))
259 vectorization_factor = nunits;
264 /* TODO: Analyze cost. Decide if worth while to vectorize. */
266 if (vectorization_factor <= 1)
268 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
269 fprintf (vect_dump, "not vectorized: unsupported data-type");
272 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
278 /* Function vect_analyze_operations.
280 Scan the loop stmts and make sure they are all vectorizable. */
283 vect_analyze_operations (loop_vec_info loop_vinfo)
285 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
286 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
287 int nbbs = loop->num_nodes;
288 block_stmt_iterator si;
289 unsigned int vectorization_factor = 0;
293 stmt_vec_info stmt_info;
294 bool need_to_vectorize = false;
296 if (vect_print_dump_info (REPORT_DETAILS))
297 fprintf (vect_dump, "=== vect_analyze_operations ===");
299 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
300 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
302 for (i = 0; i < nbbs; i++)
304 basic_block bb = bbs[i];
306 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
310 stmt_info = vinfo_for_stmt (phi);
311 if (vect_print_dump_info (REPORT_DETAILS))
313 fprintf (vect_dump, "examining phi: ");
314 print_generic_expr (vect_dump, phi, TDF_SLIM);
317 gcc_assert (stmt_info);
319 if (STMT_VINFO_LIVE_P (stmt_info))
321 /* FORNOW: not yet supported. */
322 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
323 fprintf (vect_dump, "not vectorized: value used after loop.");
327 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
328 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
330 /* A scalar-dependence cycle that we don't support. */
331 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
332 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
336 if (STMT_VINFO_RELEVANT_P (stmt_info))
338 need_to_vectorize = true;
339 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
340 ok = vectorizable_induction (phi, NULL, NULL);
345 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
348 "not vectorized: relevant phi not supported: ");
349 print_generic_expr (vect_dump, phi, TDF_SLIM);
355 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
357 tree stmt = bsi_stmt (si);
358 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
359 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
361 if (vect_print_dump_info (REPORT_DETAILS))
363 fprintf (vect_dump, "==> examining statement: ");
364 print_generic_expr (vect_dump, stmt, TDF_SLIM);
367 gcc_assert (stmt_info);
369 /* skip stmts which do not need to be vectorized.
370 this is expected to include:
371 - the COND_EXPR which is the loop exit condition
372 - any LABEL_EXPRs in the loop
373 - computations that are used only for array indexing or loop
376 if (!STMT_VINFO_RELEVANT_P (stmt_info)
377 && !STMT_VINFO_LIVE_P (stmt_info))
379 if (vect_print_dump_info (REPORT_DETAILS))
380 fprintf (vect_dump, "irrelevant.");
384 switch (STMT_VINFO_DEF_TYPE (stmt_info))
389 case vect_reduction_def:
390 gcc_assert (relevance == vect_unused_in_loop);
393 case vect_induction_def:
394 case vect_constant_def:
395 case vect_invariant_def:
396 case vect_unknown_def_type:
401 if (STMT_VINFO_RELEVANT_P (stmt_info))
403 gcc_assert (GIMPLE_STMT_P (stmt)
404 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
405 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
406 need_to_vectorize = true;
409 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
410 || vectorizable_type_demotion (stmt, NULL, NULL)
411 || vectorizable_conversion (stmt, NULL, NULL)
412 || vectorizable_operation (stmt, NULL, NULL)
413 || vectorizable_assignment (stmt, NULL, NULL)
414 || vectorizable_load (stmt, NULL, NULL)
415 || vectorizable_call (stmt, NULL, NULL)
416 || vectorizable_store (stmt, NULL, NULL)
417 || vectorizable_condition (stmt, NULL, NULL)
418 || vectorizable_reduction (stmt, NULL, NULL));
420 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
421 need extra handling, except for vectorizable reductions. */
422 if (STMT_VINFO_LIVE_P (stmt_info)
423 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
424 ok |= vectorizable_live_operation (stmt, NULL, NULL);
428 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
430 fprintf (vect_dump, "not vectorized: stmt not supported: ");
431 print_generic_expr (vect_dump, stmt, TDF_SLIM);
438 /* TODO: Analyze cost. Decide if worth while to vectorize. */
440 /* All operations in the loop are either irrelevant (deal with loop
441 control, or dead), or only used outside the loop and can be moved
442 out of the loop (e.g. invariants, inductions). The loop can be
443 optimized away by scalar optimizations. We're better off not
444 touching this loop. */
445 if (!need_to_vectorize)
447 if (vect_print_dump_info (REPORT_DETAILS))
449 "All the computation can be taken out of the loop.");
450 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
452 "not vectorized: redundant loop. no profit to vectorize.");
456 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
457 && vect_print_dump_info (REPORT_DETAILS))
459 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
460 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
462 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
463 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
464 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
465 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
466 * vectorization_factor))))
468 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
469 fprintf (vect_dump, "not vectorized: iteration count too small.");
473 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
474 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
475 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
477 if (vect_print_dump_info (REPORT_DETAILS))
478 fprintf (vect_dump, "epilog loop required.");
479 if (!vect_can_advance_ivs_p (loop_vinfo))
481 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
483 "not vectorized: can't create epilog loop 1.");
486 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
488 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
490 "not vectorized: can't create epilog loop 2.");
499 /* Function exist_non_indexing_operands_for_use_p
501 USE is one of the uses attached to STMT. Check if USE is
502 used in STMT for anything other than indexing an array. */
505 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
510 /* USE corresponds to some operand in STMT. If there is no data
511 reference in STMT, then any operand that corresponds to USE
512 is not indexing an array. */
513 if (!STMT_VINFO_DATA_REF (stmt_info))
516 /* STMT has a data_ref. FORNOW this means that its of one of
520 (This should have been verified in analyze_data_refs).
522 'var' in the second case corresponds to a def, not a use,
523 so USE cannot correspond to any operands that are not used
526 Therefore, all we need to check is if STMT falls into the
527 first case, and whether var corresponds to USE. */
529 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
532 operand = GIMPLE_STMT_OPERAND (stmt, 1);
534 if (TREE_CODE (operand) != SSA_NAME)
544 /* Function vect_analyze_scalar_cycles.
546 Examine the cross iteration def-use cycles of scalar variables, by
547 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
548 following: invariant, induction, reduction, unknown.
550 Some forms of scalar cycles are not yet supported.
552 Example1: reduction: (unsupported yet)
558 Example2: induction: (unsupported yet)
564 Note: the following loop *is* vectorizable:
570 even though it has a def-use cycle caused by the induction variable i:
572 loop: i_2 = PHI (i_0, i_1)
577 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
578 it does not need to be vectorized because it is only used for array
579 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
580 loop2 on the other hand is relevant (it is being written to memory).
584 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
587 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
588 basic_block bb = loop->header;
590 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
592 if (vect_print_dump_info (REPORT_DETAILS))
593 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
595 /* First - identify all inductions. */
596 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
598 tree access_fn = NULL;
599 tree def = PHI_RESULT (phi);
600 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
602 if (vect_print_dump_info (REPORT_DETAILS))
604 fprintf (vect_dump, "Analyze phi: ");
605 print_generic_expr (vect_dump, phi, TDF_SLIM);
608 /* Skip virtual phi's. The data dependences that are associated with
609 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
610 if (!is_gimple_reg (SSA_NAME_VAR (def)))
613 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
615 /* Analyze the evolution function. */
616 access_fn = analyze_scalar_evolution (loop, def);
617 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "Access function of PHI: ");
620 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
624 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
626 VEC_safe_push (tree, heap, worklist, phi);
630 if (vect_print_dump_info (REPORT_DETAILS))
631 fprintf (vect_dump, "Detected induction.");
632 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
636 /* Second - identify all reductions. */
637 while (VEC_length (tree, worklist) > 0)
639 tree phi = VEC_pop (tree, worklist);
640 tree def = PHI_RESULT (phi);
641 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
644 if (vect_print_dump_info (REPORT_DETAILS))
646 fprintf (vect_dump, "Analyze phi: ");
647 print_generic_expr (vect_dump, phi, TDF_SLIM);
650 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
651 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
653 reduc_stmt = vect_is_simple_reduction (loop, phi);
656 if (vect_print_dump_info (REPORT_DETAILS))
657 fprintf (vect_dump, "Detected reduction.");
658 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
659 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
663 if (vect_print_dump_info (REPORT_DETAILS))
664 fprintf (vect_dump, "Unknown def-use cycle pattern.");
667 VEC_free (tree, heap, worklist);
672 /* Function vect_insert_into_interleaving_chain.
674 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
677 vect_insert_into_interleaving_chain (struct data_reference *dra,
678 struct data_reference *drb)
680 tree prev, next, next_init;
681 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
682 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
684 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
685 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
688 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
689 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
692 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
693 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
697 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
700 /* We got to the end of the list. Insert here. */
701 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
702 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
706 /* Function vect_update_interleaving_chain.
708 For two data-refs DRA and DRB that are a part of a chain interleaved data
709 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
711 There are four possible cases:
712 1. New stmts - both DRA and DRB are not a part of any chain:
715 2. DRB is a part of a chain and DRA is not:
716 no need to update FIRST_DR
717 no need to insert DRB
718 insert DRA according to init
719 3. DRA is a part of a chain and DRB is not:
720 if (init of FIRST_DR > init of DRB)
722 NEXT(FIRST_DR) = previous FIRST_DR
724 insert DRB according to its init
725 4. both DRA and DRB are in some interleaving chains:
726 choose the chain with the smallest init of FIRST_DR
727 insert the nodes of the second chain into the first one. */
730 vect_update_interleaving_chain (struct data_reference *drb,
731 struct data_reference *dra)
733 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
734 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
735 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
736 tree node, prev, next, node_init, first_stmt;
738 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
739 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
741 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
742 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
743 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
747 /* 2. DRB is a part of a chain and DRA is not. */
748 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
750 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
751 /* Insert DRA into the chain of DRB. */
752 vect_insert_into_interleaving_chain (dra, drb);
756 /* 3. DRA is a part of a chain and DRB is not. */
757 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
759 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
760 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
764 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
766 /* DRB's init is smaller than the init of the stmt previously marked
767 as the first stmt of the interleaving chain of DRA. Therefore, we
768 update FIRST_STMT and put DRB in the head of the list. */
769 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
770 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
772 /* Update all the stmts in the list to point to the new FIRST_STMT. */
773 tmp = old_first_stmt;
776 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
777 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
782 /* Insert DRB in the list of DRA. */
783 vect_insert_into_interleaving_chain (drb, dra);
784 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
789 /* 4. both DRA and DRB are in some interleaving chains. */
790 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
791 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
792 if (first_a == first_b)
794 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
795 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
797 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
799 /* Insert the nodes of DRA chain into the DRB chain.
800 After inserting a node, continue from this node of the DRB chain (don't
801 start from the beginning. */
802 node = DR_GROUP_FIRST_DR (stmtinfo_a);
803 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
804 first_stmt = first_b;
808 /* Insert the nodes of DRB chain into the DRA chain.
809 After inserting a node, continue from this node of the DRA chain (don't
810 start from the beginning. */
811 node = DR_GROUP_FIRST_DR (stmtinfo_b);
812 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
813 first_stmt = first_a;
818 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
819 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
822 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
823 if (tree_int_cst_compare (next_init, node_init) > 0)
826 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
827 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
832 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
836 /* We got to the end of the list. Insert here. */
837 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
838 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
841 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
842 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
847 /* Function vect_equal_offsets.
849 Check if OFFSET1 and OFFSET2 are identical expressions. */
852 vect_equal_offsets (tree offset1, tree offset2)
856 STRIP_NOPS (offset1);
857 STRIP_NOPS (offset2);
859 if (offset1 == offset2)
862 if (TREE_CODE (offset1) != TREE_CODE (offset2)
863 || !BINARY_CLASS_P (offset1)
864 || !BINARY_CLASS_P (offset2))
867 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
868 TREE_OPERAND (offset2, 0));
869 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
870 TREE_OPERAND (offset2, 1));
872 return (res0 && res1);
876 /* Function vect_check_interleaving.
878 Check if DRA and DRB are a part of interleaving. In case they are, insert
879 DRA and DRB in an interleaving chain. */
882 vect_check_interleaving (struct data_reference *dra,
883 struct data_reference *drb)
885 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
887 /* Check that the data-refs have same first location (except init) and they
888 are both either store or load (not load and store). */
889 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
890 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
891 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
892 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
893 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
894 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
895 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
896 || DR_IS_READ (dra) != DR_IS_READ (drb))
900 1. data-refs are of the same type
901 2. their steps are equal
902 3. the step is greater than the difference between data-refs' inits */
903 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
904 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
906 if (type_size_a != type_size_b
907 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
910 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
911 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
912 step = TREE_INT_CST_LOW (DR_STEP (dra));
916 /* If init_a == init_b + the size of the type * k, we have an interleaving,
917 and DRB is accessed before DRA. */
918 diff_mod_size = (init_a - init_b) % type_size_a;
920 if ((init_a - init_b) > step)
923 if (diff_mod_size == 0)
925 vect_update_interleaving_chain (drb, dra);
926 if (vect_print_dump_info (REPORT_DR_DETAILS))
928 fprintf (vect_dump, "Detected interleaving ");
929 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
930 fprintf (vect_dump, " and ");
931 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
938 /* If init_b == init_a + the size of the type * k, we have an
939 interleaving, and DRA is accessed before DRB. */
940 diff_mod_size = (init_b - init_a) % type_size_a;
942 if ((init_b - init_a) > step)
945 if (diff_mod_size == 0)
947 vect_update_interleaving_chain (dra, drb);
948 if (vect_print_dump_info (REPORT_DR_DETAILS))
950 fprintf (vect_dump, "Detected interleaving ");
951 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
952 fprintf (vect_dump, " and ");
953 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
961 /* Function vect_analyze_data_ref_dependence.
963 Return TRUE if there (might) exist a dependence between a memory-reference
964 DRA and a memory-reference DRB. */
967 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
968 loop_vec_info loop_vinfo)
971 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
972 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
973 struct data_reference *dra = DDR_A (ddr);
974 struct data_reference *drb = DDR_B (ddr);
975 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
976 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
977 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
978 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
979 lambda_vector dist_v;
980 unsigned int loop_depth;
982 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
984 /* Independent data accesses. */
985 vect_check_interleaving (dra, drb);
989 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
992 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
994 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
997 "not vectorized: can't determine dependence between ");
998 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
999 fprintf (vect_dump, " and ");
1000 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1005 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1007 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1009 fprintf (vect_dump, "not vectorized: bad dist vector for ");
1010 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1011 fprintf (vect_dump, " and ");
1012 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1017 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1018 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1020 int dist = dist_v[loop_depth];
1022 if (vect_print_dump_info (REPORT_DR_DETAILS))
1023 fprintf (vect_dump, "dependence distance = %d.", dist);
1025 /* Same loop iteration. */
1026 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1028 /* Two references with distance zero have the same alignment. */
1029 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1030 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1031 if (vect_print_dump_info (REPORT_ALIGNMENT))
1032 fprintf (vect_dump, "accesses have the same alignment.");
1033 if (vect_print_dump_info (REPORT_DR_DETAILS))
1035 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1036 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1037 fprintf (vect_dump, " and ");
1038 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1041 /* For interleaving, mark that there is a read-write dependency if
1042 necessary. We check before that one of the data-refs is store. */
1043 if (DR_IS_READ (dra))
1044 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1047 if (DR_IS_READ (drb))
1048 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1054 if (abs (dist) >= vectorization_factor)
1056 /* Dependence distance does not create dependence, as far as vectorization
1057 is concerned, in this case. */
1058 if (vect_print_dump_info (REPORT_DR_DETAILS))
1059 fprintf (vect_dump, "dependence distance >= VF.");
1063 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1066 "not vectorized: possible dependence between data-refs ");
1067 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1068 fprintf (vect_dump, " and ");
1069 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1079 /* Function vect_analyze_data_ref_dependences.
1081 Examine all the data references in the loop, and make sure there do not
1082 exist any data dependences between them. */
1085 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1088 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1089 struct data_dependence_relation *ddr;
1091 if (vect_print_dump_info (REPORT_DETAILS))
1092 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1094 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1095 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1102 /* Function vect_compute_data_ref_alignment
1104 Compute the misalignment of the data reference DR.
1107 1. If during the misalignment computation it is found that the data reference
1108 cannot be vectorized then false is returned.
1109 2. DR_MISALIGNMENT (DR) is defined.
1111 FOR NOW: No analysis is actually performed. Misalignment is calculated
1112 only for trivial cases. TODO. */
1115 vect_compute_data_ref_alignment (struct data_reference *dr)
1117 tree stmt = DR_STMT (dr);
1118 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1119 tree ref = DR_REF (dr);
1121 tree base, base_addr;
1124 tree aligned_to, alignment;
1126 if (vect_print_dump_info (REPORT_DETAILS))
1127 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1129 /* Initialize misalignment to unknown. */
1130 DR_MISALIGNMENT (dr) = -1;
1132 misalign = DR_OFFSET_MISALIGNMENT (dr);
1133 aligned_to = DR_ALIGNED_TO (dr);
1134 base_addr = DR_BASE_ADDRESS (dr);
1135 base = build_fold_indirect_ref (base_addr);
1136 vectype = STMT_VINFO_VECTYPE (stmt_info);
1137 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1139 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1142 if (vect_print_dump_info (REPORT_DETAILS))
1144 fprintf (vect_dump, "Unknown alignment for access: ");
1145 print_generic_expr (vect_dump, base, TDF_SLIM);
1151 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1153 || (TREE_CODE (base_addr) == SSA_NAME
1154 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1155 TREE_TYPE (base_addr)))),
1157 base_aligned = true;
1159 base_aligned = false;
1163 /* Do not change the alignment of global variables if
1164 flag_section_anchors is enabled. */
1165 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1166 || (TREE_STATIC (base) && flag_section_anchors))
1168 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "can't force alignment of ref: ");
1171 print_generic_expr (vect_dump, ref, TDF_SLIM);
1176 /* Force the alignment of the decl.
1177 NOTE: This is the only change to the code we make during
1178 the analysis phase, before deciding to vectorize the loop. */
1179 if (vect_print_dump_info (REPORT_DETAILS))
1180 fprintf (vect_dump, "force alignment");
1181 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1182 DECL_USER_ALIGN (base) = 1;
1185 /* At this point we assume that the base is aligned. */
1186 gcc_assert (base_aligned
1187 || (TREE_CODE (base) == VAR_DECL
1188 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1190 /* Modulo alignment. */
1191 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1193 if (!host_integerp (misalign, 1))
1195 /* Negative or overflowed misalignment value. */
1196 if (vect_print_dump_info (REPORT_DETAILS))
1197 fprintf (vect_dump, "unexpected misalign value");
1201 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1203 if (vect_print_dump_info (REPORT_DETAILS))
1205 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1206 print_generic_expr (vect_dump, ref, TDF_SLIM);
1213 /* Function vect_compute_data_refs_alignment
1215 Compute the misalignment of data references in the loop.
1216 Return FALSE if a data reference is found that cannot be vectorized. */
1219 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1221 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1222 struct data_reference *dr;
1225 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1226 if (!vect_compute_data_ref_alignment (dr))
1233 /* Function vect_update_misalignment_for_peel
1235 DR - the data reference whose misalignment is to be adjusted.
1236 DR_PEEL - the data reference whose misalignment is being made
1237 zero in the vector loop by the peel.
1238 NPEEL - the number of iterations in the peel loop if the misalignment
1239 of DR_PEEL is known at compile time. */
1242 vect_update_misalignment_for_peel (struct data_reference *dr,
1243 struct data_reference *dr_peel, int npeel)
1246 VEC(dr_p,heap) *same_align_drs;
1247 struct data_reference *current_dr;
1248 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1249 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1250 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1251 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1253 /* For interleaved data accesses the step in the loop must be multiplied by
1254 the size of the interleaving group. */
1255 if (DR_GROUP_FIRST_DR (stmt_info))
1256 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1257 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1258 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1260 if (known_alignment_for_access_p (dr)
1261 && known_alignment_for_access_p (dr_peel)
1262 && (DR_MISALIGNMENT (dr) / dr_size ==
1263 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1265 DR_MISALIGNMENT (dr) = 0;
1269 /* It can be assumed that the data refs with the same alignment as dr_peel
1270 are aligned in the vector loop. */
1272 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1273 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1275 if (current_dr != dr)
1277 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1278 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1279 DR_MISALIGNMENT (dr) = 0;
1283 if (known_alignment_for_access_p (dr)
1284 && known_alignment_for_access_p (dr_peel))
1286 DR_MISALIGNMENT (dr) += npeel * dr_size;
1287 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1291 if (vect_print_dump_info (REPORT_DETAILS))
1292 fprintf (vect_dump, "Setting misalignment to -1.");
1293 DR_MISALIGNMENT (dr) = -1;
1297 /* Function vect_verify_datarefs_alignment
1299 Return TRUE if all data references in the loop can be
1300 handled with respect to alignment. */
1303 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1305 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1306 struct data_reference *dr;
1307 enum dr_alignment_support supportable_dr_alignment;
1310 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1312 tree stmt = DR_STMT (dr);
1313 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1315 /* For interleaving, only the alignment of the first access matters. */
1316 if (DR_GROUP_FIRST_DR (stmt_info)
1317 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1320 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1321 if (!supportable_dr_alignment)
1323 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1325 if (DR_IS_READ (dr))
1327 "not vectorized: unsupported unaligned load.");
1330 "not vectorized: unsupported unaligned store.");
1334 if (supportable_dr_alignment != dr_aligned
1335 && vect_print_dump_info (REPORT_ALIGNMENT))
1336 fprintf (vect_dump, "Vectorizing an unaligned access.");
1342 /* Function vect_enhance_data_refs_alignment
1344 This pass will use loop versioning and loop peeling in order to enhance
1345 the alignment of data references in the loop.
1347 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1348 original loop is to be vectorized; Any other loops that are created by
1349 the transformations performed in this pass - are not supposed to be
1350 vectorized. This restriction will be relaxed.
1352 This pass will require a cost model to guide it whether to apply peeling
1353 or versioning or a combination of the two. For example, the scheme that
1354 intel uses when given a loop with several memory accesses, is as follows:
1355 choose one memory access ('p') which alignment you want to force by doing
1356 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1357 other accesses are not necessarily aligned, or (2) use loop versioning to
1358 generate one loop in which all accesses are aligned, and another loop in
1359 which only 'p' is necessarily aligned.
1361 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1362 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1363 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1365 Devising a cost model is the most critical aspect of this work. It will
1366 guide us on which access to peel for, whether to use loop versioning, how
1367 many versions to create, etc. The cost model will probably consist of
1368 generic considerations as well as target specific considerations (on
1369 powerpc for example, misaligned stores are more painful than misaligned
1372 Here are the general steps involved in alignment enhancements:
1374 -- original loop, before alignment analysis:
1375 for (i=0; i<N; i++){
1376 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1377 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1380 -- After vect_compute_data_refs_alignment:
1381 for (i=0; i<N; i++){
1382 x = q[i]; # DR_MISALIGNMENT(q) = 3
1383 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1386 -- Possibility 1: we do loop versioning:
1388 for (i=0; i<N; i++){ # loop 1A
1389 x = q[i]; # DR_MISALIGNMENT(q) = 3
1390 p[i] = y; # DR_MISALIGNMENT(p) = 0
1394 for (i=0; i<N; i++){ # loop 1B
1395 x = q[i]; # DR_MISALIGNMENT(q) = 3
1396 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1400 -- Possibility 2: we do loop peeling:
1401 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1405 for (i = 3; i < N; i++){ # loop 2A
1406 x = q[i]; # DR_MISALIGNMENT(q) = 0
1407 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1410 -- Possibility 3: combination of loop peeling and versioning:
1411 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1416 for (i = 3; i<N; i++){ # loop 3A
1417 x = q[i]; # DR_MISALIGNMENT(q) = 0
1418 p[i] = y; # DR_MISALIGNMENT(p) = 0
1422 for (i = 3; i<N; i++){ # loop 3B
1423 x = q[i]; # DR_MISALIGNMENT(q) = 0
1424 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1428 These loops are later passed to loop_transform to be vectorized. The
1429 vectorizer will use the alignment information to guide the transformation
1430 (whether to generate regular loads/stores, or with special handling for
1434 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1436 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1437 enum dr_alignment_support supportable_dr_alignment;
1438 struct data_reference *dr0 = NULL;
1439 struct data_reference *dr;
1441 bool do_peeling = false;
1442 bool do_versioning = false;
1445 stmt_vec_info stmt_info;
1447 if (vect_print_dump_info (REPORT_DETAILS))
1448 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1450 /* While cost model enhancements are expected in the future, the high level
1451 view of the code at this time is as follows:
1453 A) If there is a misaligned write then see if peeling to align this write
1454 can make all data references satisfy vect_supportable_dr_alignment.
1455 If so, update data structures as needed and return true. Note that
1456 at this time vect_supportable_dr_alignment is known to return false
1457 for a misaligned write.
1459 B) If peeling wasn't possible and there is a data reference with an
1460 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1461 then see if loop versioning checks can be used to make all data
1462 references satisfy vect_supportable_dr_alignment. If so, update
1463 data structures as needed and return true.
1465 C) If neither peeling nor versioning were successful then return false if
1466 any data reference does not satisfy vect_supportable_dr_alignment.
1468 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1470 Note, Possibility 3 above (which is peeling and versioning together) is not
1471 being done at this time. */
1473 /* (1) Peeling to force alignment. */
1475 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1477 + How many accesses will become aligned due to the peeling
1478 - How many accesses will become unaligned due to the peeling,
1479 and the cost of misaligned accesses.
1480 - The cost of peeling (the extra runtime checks, the increase
1483 The scheme we use FORNOW: peel to force the alignment of the first
1484 misaligned store in the loop.
1485 Rationale: misaligned stores are not yet supported.
1487 TODO: Use a cost model. */
1489 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1491 stmt = DR_STMT (dr);
1492 stmt_info = vinfo_for_stmt (stmt);
1494 /* For interleaving, only the alignment of the first access
1496 if (DR_GROUP_FIRST_DR (stmt_info)
1497 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1500 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1502 if (DR_GROUP_FIRST_DR (stmt_info))
1504 /* For interleaved access we peel only if number of iterations in
1505 the prolog loop ({VF - misalignment}), is a multiple of the
1506 number of the interleaved accesses. */
1507 int elem_size, mis_in_elements;
1508 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1510 /* FORNOW: handle only known alignment. */
1511 if (!known_alignment_for_access_p (dr))
1517 elem_size = UNITS_PER_SIMD_WORD / vf;
1518 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1520 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1532 /* Often peeling for alignment will require peeling for loop-bound, which in
1533 turn requires that we know how to adjust the loop ivs after the loop. */
1534 if (!vect_can_advance_ivs_p (loop_vinfo))
1542 if (known_alignment_for_access_p (dr0))
1544 /* Since it's known at compile time, compute the number of iterations
1545 in the peeled loop (the peeling factor) for use in updating
1546 DR_MISALIGNMENT values. The peeling factor is the vectorization
1547 factor minus the misalignment as an element count. */
1548 mis = DR_MISALIGNMENT (dr0);
1549 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1550 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1552 /* For interleaved data access every iteration accesses all the
1553 members of the group, therefore we divide the number of iterations
1554 by the group size. */
1555 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1556 if (DR_GROUP_FIRST_DR (stmt_info))
1557 npeel /= DR_GROUP_SIZE (stmt_info);
1559 if (vect_print_dump_info (REPORT_DETAILS))
1560 fprintf (vect_dump, "Try peeling by %d", npeel);
1563 /* Ensure that all data refs can be vectorized after the peel. */
1564 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1566 int save_misalignment;
1571 stmt = DR_STMT (dr);
1572 stmt_info = vinfo_for_stmt (stmt);
1573 /* For interleaving, only the alignment of the first access
1575 if (DR_GROUP_FIRST_DR (stmt_info)
1576 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1579 save_misalignment = DR_MISALIGNMENT (dr);
1580 vect_update_misalignment_for_peel (dr, dr0, npeel);
1581 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1582 DR_MISALIGNMENT (dr) = save_misalignment;
1584 if (!supportable_dr_alignment)
1593 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1594 If the misalignment of DR_i is identical to that of dr0 then set
1595 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1596 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1597 by the peeling factor times the element size of DR_i (MOD the
1598 vectorization factor times the size). Otherwise, the
1599 misalignment of DR_i must be set to unknown. */
1600 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1602 vect_update_misalignment_for_peel (dr, dr0, npeel);
1604 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1605 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1606 DR_MISALIGNMENT (dr0) = 0;
1607 if (vect_print_dump_info (REPORT_ALIGNMENT))
1608 fprintf (vect_dump, "Alignment of access forced using peeling.");
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "Peeling for alignment will be applied.");
1613 stat = vect_verify_datarefs_alignment (loop_vinfo);
1620 /* (2) Versioning to force alignment. */
1622 /* Try versioning if:
1623 1) flag_tree_vect_loop_version is TRUE
1624 2) optimize_size is FALSE
1625 3) there is at least one unsupported misaligned data ref with an unknown
1627 4) all misaligned data refs with a known misalignment are supported, and
1628 5) the number of runtime alignment checks is within reason. */
1630 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1634 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1636 stmt = DR_STMT (dr);
1637 stmt_info = vinfo_for_stmt (stmt);
1639 /* For interleaving, only the alignment of the first access
1641 if (aligned_access_p (dr)
1642 || (DR_GROUP_FIRST_DR (stmt_info)
1643 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1646 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1648 if (!supportable_dr_alignment)
1654 if (known_alignment_for_access_p (dr)
1655 || VEC_length (tree,
1656 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1657 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1659 do_versioning = false;
1663 stmt = DR_STMT (dr);
1664 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1665 gcc_assert (vectype);
1667 /* The rightmost bits of an aligned address must be zeros.
1668 Construct the mask needed for this test. For example,
1669 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1670 mask must be 15 = 0xf. */
1671 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1673 /* FORNOW: use the same mask to test all potentially unaligned
1674 references in the loop. The vectorizer currently supports
1675 a single vector size, see the reference to
1676 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1677 vectorization factor is computed. */
1678 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1679 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1680 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1681 VEC_safe_push (tree, heap,
1682 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1687 /* Versioning requires at least one misaligned data reference. */
1688 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1689 do_versioning = false;
1690 else if (!do_versioning)
1691 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1696 VEC(tree,heap) *may_misalign_stmts
1697 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1700 /* It can now be assumed that the data references in the statements
1701 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1702 of the loop being vectorized. */
1703 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1705 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1706 dr = STMT_VINFO_DATA_REF (stmt_info);
1707 DR_MISALIGNMENT (dr) = 0;
1708 if (vect_print_dump_info (REPORT_ALIGNMENT))
1709 fprintf (vect_dump, "Alignment of access forced using versioning.");
1712 if (vect_print_dump_info (REPORT_DETAILS))
1713 fprintf (vect_dump, "Versioning for alignment will be applied.");
1715 /* Peeling and versioning can't be done together at this time. */
1716 gcc_assert (! (do_peeling && do_versioning));
1718 stat = vect_verify_datarefs_alignment (loop_vinfo);
1723 /* This point is reached if neither peeling nor versioning is being done. */
1724 gcc_assert (! (do_peeling || do_versioning));
1726 stat = vect_verify_datarefs_alignment (loop_vinfo);
1731 /* Function vect_analyze_data_refs_alignment
1733 Analyze the alignment of the data-references in the loop.
1734 Return FALSE if a data reference is found that cannot be vectorized. */
1737 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1739 if (vect_print_dump_info (REPORT_DETAILS))
1740 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1742 if (!vect_compute_data_refs_alignment (loop_vinfo))
1744 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1746 "not vectorized: can't calculate alignment for data ref.");
1754 /* Function vect_analyze_data_ref_access.
1756 Analyze the access pattern of the data-reference DR. For now, a data access
1757 has to be consecutive to be considered vectorizable. */
1760 vect_analyze_data_ref_access (struct data_reference *dr)
1762 tree step = DR_STEP (dr);
1763 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1764 tree scalar_type = TREE_TYPE (DR_REF (dr));
1765 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1766 tree stmt = DR_STMT (dr);
1767 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1768 interleaving group (including gaps). */
1769 HOST_WIDE_INT stride = dr_step / type_size;
1773 if (vect_print_dump_info (REPORT_DETAILS))
1774 fprintf (vect_dump, "bad data-ref access");
1779 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1781 /* Mark that it is not interleaving. */
1782 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1786 /* Not consecutive access is possible only if it is a part of interleaving. */
1787 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1789 /* Check if it this DR is a part of interleaving, and is a single
1790 element of the group that is accessed in the loop. */
1792 /* Gaps are supported only for loads. STEP must be a multiple of the type
1793 size. The size of the group must be a power of 2. */
1795 && (dr_step % type_size) == 0
1797 && exact_log2 (stride) != -1)
1799 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1800 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1801 if (vect_print_dump_info (REPORT_DR_DETAILS))
1803 fprintf (vect_dump, "Detected single element interleaving %d ",
1804 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1805 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1806 fprintf (vect_dump, " step ");
1807 print_generic_expr (vect_dump, step, TDF_SLIM);
1811 if (vect_print_dump_info (REPORT_DETAILS))
1812 fprintf (vect_dump, "not consecutive access");
1816 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1818 /* First stmt in the interleaving chain. Check the chain. */
1819 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1820 struct data_reference *data_ref = dr;
1821 unsigned int count = 1;
1823 tree prev_init = DR_INIT (data_ref);
1825 HOST_WIDE_INT diff, count_in_bytes;
1829 /* Skip same data-refs. In case that two or more stmts share data-ref
1830 (supported only for loads), we vectorize only the first stmt, and
1831 the rest get their vectorized loads from the first one. */
1832 if (!tree_int_cst_compare (DR_INIT (data_ref),
1833 DR_INIT (STMT_VINFO_DATA_REF (
1834 vinfo_for_stmt (next)))))
1836 if (!DR_IS_READ (data_ref))
1838 if (vect_print_dump_info (REPORT_DETAILS))
1839 fprintf (vect_dump, "Two store stmts share the same dr.");
1843 /* Check that there is no load-store dependencies for this loads
1844 to prevent a case of load-store-load to the same location. */
1845 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1846 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1848 if (vect_print_dump_info (REPORT_DETAILS))
1850 "READ_WRITE dependence in interleaving.");
1854 /* For load use the same data-ref load. */
1855 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1858 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1863 /* Check that all the accesses have the same STEP. */
1864 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1865 if (tree_int_cst_compare (step, next_step))
1867 if (vect_print_dump_info (REPORT_DETAILS))
1868 fprintf (vect_dump, "not consecutive access in interleaving");
1872 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1873 /* Check that the distance between two accesses is equal to the type
1874 size. Otherwise, we have gaps. */
1875 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1876 - TREE_INT_CST_LOW (prev_init)) / type_size;
1877 if (!DR_IS_READ (data_ref) && diff != 1)
1879 if (vect_print_dump_info (REPORT_DETAILS))
1880 fprintf (vect_dump, "interleaved store with gaps");
1883 /* Store the gap from the previous member of the group. If there is no
1884 gap in the access, DR_GROUP_GAP is always 1. */
1885 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1887 prev_init = DR_INIT (data_ref);
1888 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1889 /* Count the number of data-refs in the chain. */
1893 /* COUNT is the number of accesses found, we multiply it by the size of
1894 the type to get COUNT_IN_BYTES. */
1895 count_in_bytes = type_size * count;
1897 /* Check that the size of the interleaving is not greater than STEP. */
1898 if (dr_step < count_in_bytes)
1900 if (vect_print_dump_info (REPORT_DETAILS))
1902 fprintf (vect_dump, "interleaving size is greater than step for ");
1903 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1908 /* Check that the size of the interleaving is equal to STEP for stores,
1909 i.e., that there are no gaps. */
1910 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1912 if (vect_print_dump_info (REPORT_DETAILS))
1913 fprintf (vect_dump, "interleaved store with gaps");
1917 /* Check that STEP is a multiple of type size. */
1918 if ((dr_step % type_size) != 0)
1920 if (vect_print_dump_info (REPORT_DETAILS))
1922 fprintf (vect_dump, "step is not a multiple of type size: step ");
1923 print_generic_expr (vect_dump, step, TDF_SLIM);
1924 fprintf (vect_dump, " size ");
1925 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1931 /* FORNOW: we handle only interleaving that is a power of 2. */
1932 if (exact_log2 (stride) == -1)
1934 if (vect_print_dump_info (REPORT_DETAILS))
1935 fprintf (vect_dump, "interleaving is not a power of 2");
1938 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1944 /* Function vect_analyze_data_ref_accesses.
1946 Analyze the access pattern of all the data references in the loop.
1948 FORNOW: the only access pattern that is considered vectorizable is a
1949 simple step 1 (consecutive) access.
1951 FORNOW: handle only arrays and pointer accesses. */
1954 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1957 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1958 struct data_reference *dr;
1960 if (vect_print_dump_info (REPORT_DETAILS))
1961 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1963 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1964 if (!vect_analyze_data_ref_access (dr))
1966 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1967 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1975 /* Function vect_analyze_data_refs.
1977 Find all the data references in the loop.
1979 The general structure of the analysis of data refs in the vectorizer is as
1981 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1982 find and analyze all data-refs in the loop and their dependences.
1983 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1984 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1985 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1990 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1992 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1994 VEC (data_reference_p, heap) *datarefs;
1995 struct data_reference *dr;
1998 if (vect_print_dump_info (REPORT_DETAILS))
1999 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2001 compute_data_dependences_for_loop (loop, true,
2002 &LOOP_VINFO_DATAREFS (loop_vinfo),
2003 &LOOP_VINFO_DDRS (loop_vinfo));
2005 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2006 from stmt_vec_info struct to DR and vectype. */
2007 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2009 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2012 stmt_vec_info stmt_info;
2014 if (!dr || !DR_REF (dr))
2016 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2017 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2021 /* Update DR field in stmt_vec_info struct. */
2022 stmt = DR_STMT (dr);
2023 stmt_info = vinfo_for_stmt (stmt);
2025 if (STMT_VINFO_DATA_REF (stmt_info))
2027 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2030 "not vectorized: more than one data ref in stmt: ");
2031 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2035 STMT_VINFO_DATA_REF (stmt_info) = dr;
2037 /* Check that analysis of the data-ref succeeded. */
2038 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2041 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2043 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2044 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2048 if (!DR_MEMTAG (dr))
2050 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2052 fprintf (vect_dump, "not vectorized: no memory tag for ");
2053 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2058 /* Set vectype for STMT. */
2059 scalar_type = TREE_TYPE (DR_REF (dr));
2060 STMT_VINFO_VECTYPE (stmt_info) =
2061 get_vectype_for_scalar_type (scalar_type);
2062 if (!STMT_VINFO_VECTYPE (stmt_info))
2064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2067 "not vectorized: no vectype for stmt: ");
2068 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2069 fprintf (vect_dump, " scalar_type: ");
2070 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2080 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2082 /* Function vect_mark_relevant.
2084 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2087 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2088 enum vect_relevant relevant, bool live_p)
2090 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2091 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2092 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2094 if (vect_print_dump_info (REPORT_DETAILS))
2095 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2097 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2101 /* This is the last stmt in a sequence that was detected as a
2102 pattern that can potentially be vectorized. Don't mark the stmt
2103 as relevant/live because it's not going to vectorized.
2104 Instead mark the pattern-stmt that replaces it. */
2105 if (vect_print_dump_info (REPORT_DETAILS))
2106 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2107 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2108 stmt_info = vinfo_for_stmt (pattern_stmt);
2109 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2110 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2111 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2112 stmt = pattern_stmt;
2115 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2116 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2117 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2119 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2120 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2122 if (vect_print_dump_info (REPORT_DETAILS))
2123 fprintf (vect_dump, "already marked relevant/live.");
2127 VEC_safe_push (tree, heap, *worklist, stmt);
2131 /* Function vect_stmt_relevant_p.
2133 Return true if STMT in loop that is represented by LOOP_VINFO is
2134 "relevant for vectorization".
2136 A stmt is considered "relevant for vectorization" if:
2137 - it has uses outside the loop.
2138 - it has vdefs (it alters memory).
2139 - control stmts in the loop (except for the exit condition).
2141 CHECKME: what other side effects would the vectorizer allow? */
2144 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2145 enum vect_relevant *relevant, bool *live_p)
2147 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2148 ssa_op_iter op_iter;
2149 imm_use_iterator imm_iter;
2150 use_operand_p use_p;
2151 def_operand_p def_p;
2153 *relevant = vect_unused_in_loop;
2156 /* cond stmt other than loop exit cond. */
2157 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2158 *relevant = vect_used_in_loop;
2160 /* changing memory. */
2161 if (TREE_CODE (stmt) != PHI_NODE)
2162 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2166 *relevant = vect_used_in_loop;
2169 /* uses outside the loop. */
2170 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2172 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2174 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2175 if (!flow_bb_inside_loop_p (loop, bb))
2177 if (vect_print_dump_info (REPORT_DETAILS))
2178 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2180 /* We expect all such uses to be in the loop exit phis
2181 (because of loop closed form) */
2182 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2183 gcc_assert (bb == single_exit (loop)->dest);
2190 return (*live_p || *relevant);
2195 Function process_use.
2198 - a USE in STMT in a loop represented by LOOP_VINFO
2199 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2200 that defined USE. This is dont by calling mark_relevant and passing it
2201 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2204 Generally, LIVE_P and RELEVANT are used to define the liveness and
2205 relevance info of the DEF_STMT of this USE:
2206 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2207 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2209 - case 1: If USE is used only for address computations (e.g. array indexing),
2210 which does not need to be directly vectorized, then the liveness/relevance
2211 of the respective DEF_STMT is left unchanged.
2212 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2213 skip DEF_STMT cause it had already been processed.
2215 Return true if everyting is as expected. Return false otherwise. */
2218 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
2219 enum vect_relevant relevant, VEC(tree,heap) **worklist)
2221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2223 stmt_vec_info dstmt_vinfo;
2226 enum vect_def_type dt;
2228 /* case 1: we are only interested in uses that need to be vectorized. Uses
2229 that are used for address computation are not considered relevant. */
2230 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2233 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2236 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2240 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2243 def_bb = bb_for_stmt (def_stmt);
2244 if (!flow_bb_inside_loop_p (loop, def_bb))
2247 /* case 2: A reduction phi defining a reduction stmt (DEF_STMT). DEF_STMT
2248 must have already been processed, so we just check that everything is as
2249 expected, and we are done. */
2250 dstmt_vinfo = vinfo_for_stmt (def_stmt);
2251 if (TREE_CODE (stmt) == PHI_NODE
2252 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2253 && TREE_CODE (def_stmt) != PHI_NODE
2254 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def)
2256 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
2257 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
2258 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
2259 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
2260 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
2264 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
2269 /* Function vect_mark_stmts_to_be_vectorized.
2271 Not all stmts in the loop need to be vectorized. For example:
2280 Stmt 1 and 3 do not need to be vectorized, because loop control and
2281 addressing of vectorized data-refs are handled differently.
2283 This pass detects such stmts. */
2286 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2288 VEC(tree,heap) *worklist;
2289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2290 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2291 unsigned int nbbs = loop->num_nodes;
2292 block_stmt_iterator si;
2296 stmt_vec_info stmt_vinfo;
2300 enum vect_relevant relevant;
2302 if (vect_print_dump_info (REPORT_DETAILS))
2303 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2305 worklist = VEC_alloc (tree, heap, 64);
2307 /* 1. Init worklist. */
2308 for (i = 0; i < nbbs; i++)
2311 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2313 if (vect_print_dump_info (REPORT_DETAILS))
2315 fprintf (vect_dump, "init: phi relevant? ");
2316 print_generic_expr (vect_dump, phi, TDF_SLIM);
2319 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2320 vect_mark_relevant (&worklist, phi, relevant, live_p);
2322 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2324 stmt = bsi_stmt (si);
2325 if (vect_print_dump_info (REPORT_DETAILS))
2327 fprintf (vect_dump, "init: stmt relevant? ");
2328 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2331 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2332 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2336 /* 2. Process_worklist */
2337 while (VEC_length (tree, worklist) > 0)
2339 use_operand_p use_p;
2342 stmt = VEC_pop (tree, worklist);
2343 if (vect_print_dump_info (REPORT_DETAILS))
2345 fprintf (vect_dump, "worklist: examine stmt: ");
2346 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2349 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2350 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2351 liveness and relevance properties of STMT. */
2352 ann = stmt_ann (stmt);
2353 stmt_vinfo = vinfo_for_stmt (stmt);
2354 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2355 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2357 /* Generally, the liveness and relevance properties of STMT are
2358 propagated as is to the DEF_STMTs of its USEs:
2359 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2360 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2362 One exception is when STMT has been identified as defining a reduction
2363 variable; in this case we set the liveness/relevance as follows:
2365 relevant = vect_used_by_reduction
2366 This is because we distinguish between two kinds of relevant stmts -
2367 those that are used by a reduction computation, and those that are
2368 (also) used by a regular computation. This allows us later on to
2369 identify stmts that are used solely by a reduction, and therefore the
2370 order of the results that they produce does not have to be kept.
2372 Reduction phis are expected to be used by a reduction stmt; Other
2373 reduction stmts are expected to be unused in the loop. These are the
2374 expected values of "relevant" for reduction phis/stmts in the loop:
2377 vect_unused_in_loop ok
2378 vect_used_by_reduction ok
2379 vect_used_in_loop */
2381 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2385 case vect_unused_in_loop:
2386 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2388 case vect_used_by_reduction:
2389 if (TREE_CODE (stmt) == PHI_NODE)
2391 case vect_used_in_loop:
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "unsupported use of reduction.");
2395 VEC_free (tree, heap, worklist);
2398 relevant = vect_used_by_reduction;
2402 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2404 tree op = USE_FROM_PTR (use_p);
2405 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
2407 VEC_free (tree, heap, worklist);
2411 } /* while worklist */
2413 VEC_free (tree, heap, worklist);
2418 /* Function vect_can_advance_ivs_p
2420 In case the number of iterations that LOOP iterates is unknown at compile
2421 time, an epilog loop will be generated, and the loop induction variables
2422 (IVs) will be "advanced" to the value they are supposed to take just before
2423 the epilog loop. Here we check that the access function of the loop IVs
2424 and the expression that represents the loop bound are simple enough.
2425 These restrictions will be relaxed in the future. */
2428 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2431 basic_block bb = loop->header;
2434 /* Analyze phi functions of the loop header. */
2436 if (vect_print_dump_info (REPORT_DETAILS))
2437 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2439 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2441 tree access_fn = NULL;
2442 tree evolution_part;
2444 if (vect_print_dump_info (REPORT_DETAILS))
2446 fprintf (vect_dump, "Analyze phi: ");
2447 print_generic_expr (vect_dump, phi, TDF_SLIM);
2450 /* Skip virtual phi's. The data dependences that are associated with
2451 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2453 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "virtual phi. skip.");
2460 /* Skip reduction phis. */
2462 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2464 if (vect_print_dump_info (REPORT_DETAILS))
2465 fprintf (vect_dump, "reduc phi. skip.");
2469 /* Analyze the evolution function. */
2471 access_fn = instantiate_parameters
2472 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2476 if (vect_print_dump_info (REPORT_DETAILS))
2477 fprintf (vect_dump, "No Access function.");
2481 if (vect_print_dump_info (REPORT_DETAILS))
2483 fprintf (vect_dump, "Access function of PHI: ");
2484 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2487 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2489 if (evolution_part == NULL_TREE)
2491 if (vect_print_dump_info (REPORT_DETAILS))
2492 fprintf (vect_dump, "No evolution.");
2496 /* FORNOW: We do not transform initial conditions of IVs
2497 which evolution functions are a polynomial of degree >= 2. */
2499 if (tree_is_chrec (evolution_part))
2507 /* Function vect_get_loop_niters.
2509 Determine how many iterations the loop is executed.
2510 If an expression that represents the number of iterations
2511 can be constructed, place it in NUMBER_OF_ITERATIONS.
2512 Return the loop exit condition. */
2515 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2519 if (vect_print_dump_info (REPORT_DETAILS))
2520 fprintf (vect_dump, "=== get_loop_niters ===");
2522 niters = number_of_exit_cond_executions (loop);
2524 if (niters != NULL_TREE
2525 && niters != chrec_dont_know)
2527 *number_of_iterations = niters;
2529 if (vect_print_dump_info (REPORT_DETAILS))
2531 fprintf (vect_dump, "==> get_loop_niters:" );
2532 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2536 return get_loop_exit_condition (loop);
2540 /* Function vect_analyze_loop_form.
2542 Verify the following restrictions (some may be relaxed in the future):
2543 - it's an inner-most loop
2544 - number of BBs = 2 (which are the loop header and the latch)
2545 - the loop has a pre-header
2546 - the loop has a single entry and exit
2547 - the loop exit condition is simple enough, and the number of iterations
2548 can be analyzed (a countable loop). */
2550 static loop_vec_info
2551 vect_analyze_loop_form (struct loop *loop)
2553 loop_vec_info loop_vinfo;
2555 tree number_of_iterations = NULL;
2557 if (vect_print_dump_info (REPORT_DETAILS))
2558 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2562 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2563 fprintf (vect_dump, "not vectorized: nested loop.");
2567 if (!single_exit (loop)
2568 || loop->num_nodes != 2
2569 || EDGE_COUNT (loop->header->preds) != 2)
2571 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2573 if (!single_exit (loop))
2574 fprintf (vect_dump, "not vectorized: multiple exits.");
2575 else if (loop->num_nodes != 2)
2576 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2577 else if (EDGE_COUNT (loop->header->preds) != 2)
2578 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2584 /* We assume that the loop exit condition is at the end of the loop. i.e,
2585 that the loop is represented as a do-while (with a proper if-guard
2586 before the loop if needed), where the loop header contains all the
2587 executable statements, and the latch is empty. */
2588 if (!empty_block_p (loop->latch)
2589 || phi_nodes (loop->latch))
2591 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2592 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2596 /* Make sure there exists a single-predecessor exit bb: */
2597 if (!single_pred_p (single_exit (loop)->dest))
2599 edge e = single_exit (loop);
2600 if (!(e->flags & EDGE_ABNORMAL))
2602 split_loop_exit_edge (e);
2603 if (vect_print_dump_info (REPORT_DETAILS))
2604 fprintf (vect_dump, "split exit edge.");
2608 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2609 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2614 if (empty_block_p (loop->header))
2616 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2617 fprintf (vect_dump, "not vectorized: empty loop.");
2621 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2624 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2625 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2629 if (!number_of_iterations)
2631 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2633 "not vectorized: number of iterations cannot be computed.");
2637 if (chrec_contains_undetermined (number_of_iterations))
2639 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2640 fprintf (vect_dump, "Infinite number of iterations.");
2644 loop_vinfo = new_loop_vec_info (loop);
2645 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2647 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2649 if (vect_print_dump_info (REPORT_DETAILS))
2651 fprintf (vect_dump, "Symbolic number of iterations is ");
2652 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2656 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2658 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2659 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2663 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2669 /* Function vect_analyze_loop.
2671 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2672 for it. The different analyses will record information in the
2673 loop_vec_info struct. */
2675 vect_analyze_loop (struct loop *loop)
2678 loop_vec_info loop_vinfo;
2680 if (vect_print_dump_info (REPORT_DETAILS))
2681 fprintf (vect_dump, "===== analyze_loop_nest =====");
2683 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2685 loop_vinfo = vect_analyze_loop_form (loop);
2688 if (vect_print_dump_info (REPORT_DETAILS))
2689 fprintf (vect_dump, "bad loop form.");
2693 /* Find all data references in the loop (which correspond to vdefs/vuses)
2694 and analyze their evolution in the loop.
2696 FORNOW: Handle only simple, array references, which
2697 alignment can be forced, and aligned pointer-references. */
2699 ok = vect_analyze_data_refs (loop_vinfo);
2702 if (vect_print_dump_info (REPORT_DETAILS))
2703 fprintf (vect_dump, "bad data references.");
2704 destroy_loop_vec_info (loop_vinfo);
2708 /* Classify all cross-iteration scalar data-flow cycles.
2709 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2711 vect_analyze_scalar_cycles (loop_vinfo);
2713 vect_pattern_recog (loop_vinfo);
2715 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2717 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2720 if (vect_print_dump_info (REPORT_DETAILS))
2721 fprintf (vect_dump, "unexpected pattern.");
2722 destroy_loop_vec_info (loop_vinfo);
2726 /* Analyze the alignment of the data-refs in the loop.
2727 Fail if a data reference is found that cannot be vectorized. */
2729 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2732 if (vect_print_dump_info (REPORT_DETAILS))
2733 fprintf (vect_dump, "bad data alignment.");
2734 destroy_loop_vec_info (loop_vinfo);
2738 ok = vect_determine_vectorization_factor (loop_vinfo);
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "can't determine vectorization factor.");
2743 destroy_loop_vec_info (loop_vinfo);
2747 /* Analyze data dependences between the data-refs in the loop.
2748 FORNOW: fail at the first data dependence that we encounter. */
2750 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2753 if (vect_print_dump_info (REPORT_DETAILS))
2754 fprintf (vect_dump, "bad data dependence.");
2755 destroy_loop_vec_info (loop_vinfo);
2759 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2760 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2762 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2765 if (vect_print_dump_info (REPORT_DETAILS))
2766 fprintf (vect_dump, "bad data access.");
2767 destroy_loop_vec_info (loop_vinfo);
2771 /* This pass will decide on using loop versioning and/or loop peeling in
2772 order to enhance the alignment of data references in the loop. */
2774 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2777 if (vect_print_dump_info (REPORT_DETAILS))
2778 fprintf (vect_dump, "bad data alignment.");
2779 destroy_loop_vec_info (loop_vinfo);
2783 /* Scan all the operations in the loop and make sure they are
2786 ok = vect_analyze_operations (loop_vinfo);
2789 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2791 destroy_loop_vec_info (loop_vinfo);
2795 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;