1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info, bool);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
103 if (vect_print_dump_info (REPORT_DETAILS))
104 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
106 for (i = 0; i < nbbs; i++)
108 basic_block bb = bbs[i];
110 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
112 tree stmt = bsi_stmt (si);
114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining statement: ");
120 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
126 gcc_assert (stmt_info);
128 /* skip stmts which do not need to be vectorized. */
129 if (!STMT_VINFO_RELEVANT_P (stmt_info)
130 && !STMT_VINFO_LIVE_P (stmt_info))
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "skip.");
137 if (!GIMPLE_STMT_P (stmt)
138 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
143 print_generic_expr (vect_dump, stmt, TDF_SLIM);
148 if (STMT_VINFO_VECTYPE (stmt_info))
150 /* The only case when a vectype had been already set is for stmts
151 that contain a dataref, or for "pattern-stmts" (stmts generated
152 by the vectorizer to represent/replace a certain idiom). */
153 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
154 || is_pattern_stmt_p (stmt_info));
155 vectype = STMT_VINFO_VECTYPE (stmt_info);
159 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
160 && !is_pattern_stmt_p (stmt_info));
162 /* We set the vectype according to the type of the result (lhs).
163 For stmts whose result-type is different than the type of the
164 arguments (e.g. demotion, promotion), vectype will be reset
165 appropriately (later). Note that we have to visit the smallest
166 datatype in this function, because that determines the VF.
167 If the samallest datatype in the loop is present only as the
168 rhs of a promotion operation - we'd miss it here.
169 However, in such a case, that a variable of this datatype
170 does not appear in the lhs anywhere in the loop, it shouldn't
171 affect the vectorization factor. */
172 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
174 if (vect_print_dump_info (REPORT_DETAILS))
176 fprintf (vect_dump, "get vectype for scalar type: ");
177 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
180 vectype = get_vectype_for_scalar_type (scalar_type);
183 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
186 "not vectorized: unsupported data-type ");
187 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
191 STMT_VINFO_VECTYPE (stmt_info) = vectype;
194 if (vect_print_dump_info (REPORT_DETAILS))
196 fprintf (vect_dump, "vectype: ");
197 print_generic_expr (vect_dump, vectype, TDF_SLIM);
200 nunits = TYPE_VECTOR_SUBPARTS (vectype);
201 if (vect_print_dump_info (REPORT_DETAILS))
202 fprintf (vect_dump, "nunits = %d", nunits);
204 if (!vectorization_factor
205 || (nunits > vectorization_factor))
206 vectorization_factor = nunits;
211 /* TODO: Analyze cost. Decide if worth while to vectorize. */
213 if (vectorization_factor <= 1)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
216 fprintf (vect_dump, "not vectorized: unsupported data-type");
219 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
225 /* Function vect_analyze_operations.
227 Scan the loop stmts and make sure they are all vectorizable. */
230 vect_analyze_operations (loop_vec_info loop_vinfo)
232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
233 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
234 int nbbs = loop->num_nodes;
235 block_stmt_iterator si;
236 unsigned int vectorization_factor = 0;
240 stmt_vec_info stmt_info;
241 bool need_to_vectorize = false;
243 if (vect_print_dump_info (REPORT_DETAILS))
244 fprintf (vect_dump, "=== vect_analyze_operations ===");
246 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
247 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
249 for (i = 0; i < nbbs; i++)
251 basic_block bb = bbs[i];
253 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
255 stmt_info = vinfo_for_stmt (phi);
256 if (vect_print_dump_info (REPORT_DETAILS))
258 fprintf (vect_dump, "examining phi: ");
259 print_generic_expr (vect_dump, phi, TDF_SLIM);
262 gcc_assert (stmt_info);
264 if (STMT_VINFO_LIVE_P (stmt_info))
266 /* FORNOW: not yet supported. */
267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
268 fprintf (vect_dump, "not vectorized: value used after loop.");
272 if (STMT_VINFO_RELEVANT_P (stmt_info))
274 /* Most likely a reduction-like computation that is used
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
277 fprintf (vect_dump, "not vectorized: unsupported pattern.");
282 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
284 tree stmt = bsi_stmt (si);
285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
287 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "==> examining statement: ");
290 print_generic_expr (vect_dump, stmt, TDF_SLIM);
293 gcc_assert (stmt_info);
295 /* skip stmts which do not need to be vectorized.
296 this is expected to include:
297 - the COND_EXPR which is the loop exit condition
298 - any LABEL_EXPRs in the loop
299 - computations that are used only for array indexing or loop
302 if (!STMT_VINFO_RELEVANT_P (stmt_info)
303 && !STMT_VINFO_LIVE_P (stmt_info))
305 if (vect_print_dump_info (REPORT_DETAILS))
306 fprintf (vect_dump, "irrelevant.");
310 if (STMT_VINFO_RELEVANT_P (stmt_info))
312 gcc_assert (GIMPLE_STMT_P (stmt)
313 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
314 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
316 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
317 || vectorizable_type_demotion (stmt, NULL, NULL)
318 || vectorizable_operation (stmt, NULL, NULL)
319 || vectorizable_assignment (stmt, NULL, NULL)
320 || vectorizable_load (stmt, NULL, NULL)
321 || vectorizable_call (stmt, NULL, NULL)
322 || vectorizable_store (stmt, NULL, NULL)
323 || vectorizable_condition (stmt, NULL, NULL));
327 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
330 "not vectorized: relevant stmt not supported: ");
331 print_generic_expr (vect_dump, stmt, TDF_SLIM);
335 need_to_vectorize = true;
338 if (STMT_VINFO_LIVE_P (stmt_info))
340 ok = vectorizable_reduction (stmt, NULL, NULL);
343 need_to_vectorize = true;
345 ok = vectorizable_live_operation (stmt, NULL, NULL);
349 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
352 "not vectorized: live stmt not supported: ");
353 print_generic_expr (vect_dump, stmt, TDF_SLIM);
361 /* TODO: Analyze cost. Decide if worth while to vectorize. */
363 /* All operations in the loop are either irrelevant (deal with loop
364 control, or dead), or only used outside the loop and can be moved
365 out of the loop (e.g. invariants, inductions). The loop can be
366 optimized away by scalar optimizations. We're better off not
367 touching this loop. */
368 if (!need_to_vectorize)
370 if (vect_print_dump_info (REPORT_DETAILS))
372 "All the computation can be taken out of the loop.");
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 "not vectorized: redundant loop. no profit to vectorize.");
379 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
380 && vect_print_dump_info (REPORT_DETAILS))
382 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
383 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
385 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
386 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
387 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
388 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
389 * vectorization_factor))))
391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
392 fprintf (vect_dump, "not vectorized: iteration count too small.");
396 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
397 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
398 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "epilog loop required.");
402 if (!vect_can_advance_ivs_p (loop_vinfo))
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
406 "not vectorized: can't create epilog loop 1.");
409 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
411 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
413 "not vectorized: can't create epilog loop 2.");
422 /* Function exist_non_indexing_operands_for_use_p
424 USE is one of the uses attached to STMT. Check if USE is
425 used in STMT for anything other than indexing an array. */
428 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
433 /* USE corresponds to some operand in STMT. If there is no data
434 reference in STMT, then any operand that corresponds to USE
435 is not indexing an array. */
436 if (!STMT_VINFO_DATA_REF (stmt_info))
439 /* STMT has a data_ref. FORNOW this means that its of one of
443 (This should have been verified in analyze_data_refs).
445 'var' in the second case corresponds to a def, not a use,
446 so USE cannot correspond to any operands that are not used
449 Therefore, all we need to check is if STMT falls into the
450 first case, and whether var corresponds to USE. */
452 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
455 operand = GIMPLE_STMT_OPERAND (stmt, 1);
457 if (TREE_CODE (operand) != SSA_NAME)
467 /* Function vect_analyze_scalar_cycles.
469 Examine the cross iteration def-use cycles of scalar variables, by
470 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
471 following: invariant, induction, reduction, unknown.
473 Some forms of scalar cycles are not yet supported.
475 Example1: reduction: (unsupported yet)
481 Example2: induction: (unsupported yet)
487 Note: the following loop *is* vectorizable:
493 even though it has a def-use cycle caused by the induction variable i:
495 loop: i_2 = PHI (i_0, i_1)
500 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
501 it does not need to be vectorized because it is only used for array
502 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
503 loop2 on the other hand is relevant (it is being written to memory).
507 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
510 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
511 basic_block bb = loop->header;
514 if (vect_print_dump_info (REPORT_DETAILS))
515 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
517 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
519 tree access_fn = NULL;
520 tree def = PHI_RESULT (phi);
521 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
524 if (vect_print_dump_info (REPORT_DETAILS))
526 fprintf (vect_dump, "Analyze phi: ");
527 print_generic_expr (vect_dump, phi, TDF_SLIM);
530 /* Skip virtual phi's. The data dependences that are associated with
531 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
533 if (!is_gimple_reg (SSA_NAME_VAR (def)))
535 if (vect_print_dump_info (REPORT_DETAILS))
536 fprintf (vect_dump, "virtual phi. skip.");
540 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
542 /* Analyze the evolution function. */
544 access_fn = analyze_scalar_evolution (loop, def);
549 if (vect_print_dump_info (REPORT_DETAILS))
551 fprintf (vect_dump, "Access function of PHI: ");
552 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
555 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
557 if (vect_print_dump_info (REPORT_DETAILS))
558 fprintf (vect_dump, "Detected induction.");
559 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
563 /* TODO: handle invariant phis */
565 reduc_stmt = vect_is_simple_reduction (loop, phi);
568 if (vect_print_dump_info (REPORT_DETAILS))
569 fprintf (vect_dump, "Detected reduction.");
570 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
571 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
575 if (vect_print_dump_info (REPORT_DETAILS))
576 fprintf (vect_dump, "Unknown def-use cycle pattern.");
584 /* Function vect_insert_into_interleaving_chain.
586 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
589 vect_insert_into_interleaving_chain (struct data_reference *dra,
590 struct data_reference *drb)
592 tree prev, next, next_init;
593 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
594 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
596 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
597 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
600 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
601 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
604 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
605 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
609 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
612 /* We got to the end of the list. Insert here. */
613 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
614 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
618 /* Function vect_update_interleaving_chain.
620 For two data-refs DRA and DRB that are a part of a chain interleaved data
621 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
623 There are four possible cases:
624 1. New stmts - both DRA and DRB are not a part of any chain:
627 2. DRB is a part of a chain and DRA is not:
628 no need to update FIRST_DR
629 no need to insert DRB
630 insert DRA according to init
631 3. DRA is a part of a chain and DRB is not:
632 if (init of FIRST_DR > init of DRB)
634 NEXT(FIRST_DR) = previous FIRST_DR
636 insert DRB according to its init
637 4. both DRA and DRB are in some interleaving chains:
638 choose the chain with the smallest init of FIRST_DR
639 insert the nodes of the second chain into the first one. */
642 vect_update_interleaving_chain (struct data_reference *drb,
643 struct data_reference *dra)
645 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
646 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
647 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
648 tree node, prev, next, node_init, first_stmt;
650 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
651 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
653 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
654 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
655 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
659 /* 2. DRB is a part of a chain and DRA is not. */
660 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
662 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
663 /* Insert DRA into the chain of DRB. */
664 vect_insert_into_interleaving_chain (dra, drb);
668 /* 3. DRA is a part of a chain and DRB is not. */
669 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
671 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
672 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
676 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
678 /* DRB's init is smaller than the init of the stmt previously marked
679 as the first stmt of the interleaving chain of DRA. Therefore, we
680 update FIRST_STMT and put DRB in the head of the list. */
681 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
682 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
684 /* Update all the stmts in the list to point to the new FIRST_STMT. */
685 tmp = old_first_stmt;
688 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
689 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
694 /* Insert DRB in the list of DRA. */
695 vect_insert_into_interleaving_chain (drb, dra);
696 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
701 /* 4. both DRA and DRB are in some interleaving chains. */
702 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
703 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
704 if (first_a == first_b)
706 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
707 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
709 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
711 /* Insert the nodes of DRA chain into the DRB chain.
712 After inserting a node, continue from this node of the DRB chain (don't
713 start from the beginning. */
714 node = DR_GROUP_FIRST_DR (stmtinfo_a);
715 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
716 first_stmt = first_b;
720 /* Insert the nodes of DRB chain into the DRA chain.
721 After inserting a node, continue from this node of the DRA chain (don't
722 start from the beginning. */
723 node = DR_GROUP_FIRST_DR (stmtinfo_b);
724 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
725 first_stmt = first_a;
730 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
731 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
734 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
735 if (tree_int_cst_compare (next_init, node_init) > 0)
738 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
739 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
744 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
748 /* We got to the end of the list. Insert here. */
749 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
750 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
753 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
754 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
759 /* Function vect_equal_offsets.
761 Check if OFFSET1 and OFFSET2 are identical expressions. */
764 vect_equal_offsets (tree offset1, tree offset2)
768 STRIP_NOPS (offset1);
769 STRIP_NOPS (offset2);
771 if (offset1 == offset2)
774 if (TREE_CODE (offset1) != TREE_CODE (offset2)
775 || !BINARY_CLASS_P (offset1)
776 || !BINARY_CLASS_P (offset2))
779 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
780 TREE_OPERAND (offset2, 0));
781 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
782 TREE_OPERAND (offset2, 1));
784 return (res0 && res1);
788 /* Function vect_check_interleaving.
790 Check if DRA and DRB are a part of interleaving. In case they are, insert
791 DRA and DRB in an interleaving chain. */
794 vect_check_interleaving (struct data_reference *dra,
795 struct data_reference *drb)
797 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
799 /* Check that the data-refs have same first location (except init) and they
800 are both either store or load (not load and store). */
801 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
802 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
803 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
804 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
805 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
806 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
807 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
808 || DR_IS_READ (dra) != DR_IS_READ (drb))
812 1. data-refs are of the same type
813 2. their steps are equal
814 3. the step is greater than the difference between data-refs' inits */
815 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
816 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
818 if (type_size_a != type_size_b
819 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
822 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
823 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
824 step = TREE_INT_CST_LOW (DR_STEP (dra));
828 /* If init_a == init_b + the size of the type * k, we have an interleaving,
829 and DRB is accessed before DRA. */
830 diff_mod_size = (init_a - init_b) % type_size_a;
832 if ((init_a - init_b) > step)
835 if (diff_mod_size == 0)
837 vect_update_interleaving_chain (drb, dra);
838 if (vect_print_dump_info (REPORT_DR_DETAILS))
840 fprintf (vect_dump, "Detected interleaving ");
841 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
842 fprintf (vect_dump, " and ");
843 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
850 /* If init_b == init_a + the size of the type * k, we have an
851 interleaving, and DRA is accessed before DRB. */
852 diff_mod_size = (init_b - init_a) % type_size_a;
854 if ((init_b - init_a) > step)
857 if (diff_mod_size == 0)
859 vect_update_interleaving_chain (dra, drb);
860 if (vect_print_dump_info (REPORT_DR_DETAILS))
862 fprintf (vect_dump, "Detected interleaving ");
863 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
864 fprintf (vect_dump, " and ");
865 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
873 /* Function vect_analyze_data_ref_dependence.
875 Return TRUE if there (might) exist a dependence between a memory-reference
876 DRA and a memory-reference DRB. */
879 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
880 loop_vec_info loop_vinfo,
881 bool check_interleaving)
884 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
885 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
886 struct data_reference *dra = DDR_A (ddr);
887 struct data_reference *drb = DDR_B (ddr);
888 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
889 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
890 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
891 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
892 lambda_vector dist_v;
893 unsigned int loop_depth;
895 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
897 /* Independent data accesses. */
898 if (check_interleaving)
899 vect_check_interleaving (dra, drb);
903 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
906 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
908 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
911 "not vectorized: can't determine dependence between ");
912 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
913 fprintf (vect_dump, " and ");
914 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
919 if (DDR_NUM_DIST_VECTS (ddr) == 0)
921 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
923 fprintf (vect_dump, "not vectorized: bad dist vector for ");
924 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
925 fprintf (vect_dump, " and ");
926 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
931 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
932 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
934 int dist = dist_v[loop_depth];
936 if (vect_print_dump_info (REPORT_DR_DETAILS))
937 fprintf (vect_dump, "dependence distance = %d.", dist);
939 /* Same loop iteration. */
940 if (dist % vectorization_factor == 0 && dra_size == drb_size)
942 /* Two references with distance zero have the same alignment. */
943 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
944 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
945 if (vect_print_dump_info (REPORT_ALIGNMENT))
946 fprintf (vect_dump, "accesses have the same alignment.");
947 if (vect_print_dump_info (REPORT_DR_DETAILS))
949 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
950 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
951 fprintf (vect_dump, " and ");
952 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
957 if (abs (dist) >= vectorization_factor)
959 /* Dependence distance does not create dependence, as far as vectorization
960 is concerned, in this case. */
961 if (vect_print_dump_info (REPORT_DR_DETAILS))
962 fprintf (vect_dump, "dependence distance >= VF.");
966 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
969 "not vectorized: possible dependence between data-refs ");
970 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
971 fprintf (vect_dump, " and ");
972 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
982 /* Function vect_check_dependences.
984 Return TRUE if there is a store-store or load-store dependence between
985 data-refs in DDR, otherwise return FALSE. */
988 vect_check_dependences (struct data_dependence_relation *ddr)
990 struct data_reference *dra = DDR_A (ddr);
991 struct data_reference *drb = DDR_B (ddr);
993 if (DDR_ARE_DEPENDENT (ddr) == chrec_known || dra == drb)
994 /* Independent or same data accesses. */
997 if (DR_IS_READ (dra) == DR_IS_READ (drb) && DR_IS_READ (dra))
1001 if (vect_print_dump_info (REPORT_DR_DETAILS))
1003 fprintf (vect_dump, "possible store or store/load dependence between ");
1004 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1005 fprintf (vect_dump, " and ");
1006 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1012 /* Function vect_analyze_data_ref_dependences.
1014 Examine all the data references in the loop, and make sure there do not
1015 exist any data dependences between them. */
1018 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1021 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1022 struct data_dependence_relation *ddr;
1023 bool check_interleaving = true;
1025 if (vect_print_dump_info (REPORT_DETAILS))
1026 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1028 /* We allow interleaving only if there are no store-store and load-store
1029 dependencies in the loop. */
1030 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1032 if (vect_check_dependences (ddr))
1034 check_interleaving = false;
1039 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1040 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, check_interleaving))
1047 /* Function vect_compute_data_ref_alignment
1049 Compute the misalignment of the data reference DR.
1052 1. If during the misalignment computation it is found that the data reference
1053 cannot be vectorized then false is returned.
1054 2. DR_MISALIGNMENT (DR) is defined.
1056 FOR NOW: No analysis is actually performed. Misalignment is calculated
1057 only for trivial cases. TODO. */
1060 vect_compute_data_ref_alignment (struct data_reference *dr)
1062 tree stmt = DR_STMT (dr);
1063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1064 tree ref = DR_REF (dr);
1066 tree base, base_addr;
1069 tree aligned_to, alignment;
1071 if (vect_print_dump_info (REPORT_DETAILS))
1072 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1074 /* Initialize misalignment to unknown. */
1075 DR_MISALIGNMENT (dr) = -1;
1077 misalign = DR_OFFSET_MISALIGNMENT (dr);
1078 aligned_to = DR_ALIGNED_TO (dr);
1079 base_addr = DR_BASE_ADDRESS (dr);
1080 base = build_fold_indirect_ref (base_addr);
1081 vectype = STMT_VINFO_VECTYPE (stmt_info);
1082 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1084 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1087 if (vect_print_dump_info (REPORT_DETAILS))
1089 fprintf (vect_dump, "Unknown alignment for access: ");
1090 print_generic_expr (vect_dump, base, TDF_SLIM);
1096 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1098 || (TREE_CODE (base_addr) == SSA_NAME
1099 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1100 TREE_TYPE (base_addr)))),
1102 base_aligned = true;
1104 base_aligned = false;
1108 /* Do not change the alignment of global variables if
1109 flag_section_anchors is enabled. */
1110 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1111 || (TREE_STATIC (base) && flag_section_anchors))
1113 if (vect_print_dump_info (REPORT_DETAILS))
1115 fprintf (vect_dump, "can't force alignment of ref: ");
1116 print_generic_expr (vect_dump, ref, TDF_SLIM);
1121 /* Force the alignment of the decl.
1122 NOTE: This is the only change to the code we make during
1123 the analysis phase, before deciding to vectorize the loop. */
1124 if (vect_print_dump_info (REPORT_DETAILS))
1125 fprintf (vect_dump, "force alignment");
1126 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1127 DECL_USER_ALIGN (base) = 1;
1130 /* At this point we assume that the base is aligned. */
1131 gcc_assert (base_aligned
1132 || (TREE_CODE (base) == VAR_DECL
1133 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1135 /* Modulo alignment. */
1136 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1138 if (!host_integerp (misalign, 1))
1140 /* Negative or overflowed misalignment value. */
1141 if (vect_print_dump_info (REPORT_DETAILS))
1142 fprintf (vect_dump, "unexpected misalign value");
1146 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1148 if (vect_print_dump_info (REPORT_DETAILS))
1150 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1151 print_generic_expr (vect_dump, ref, TDF_SLIM);
1158 /* Function vect_compute_data_refs_alignment
1160 Compute the misalignment of data references in the loop.
1161 Return FALSE if a data reference is found that cannot be vectorized. */
1164 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1166 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1167 struct data_reference *dr;
1170 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1171 if (!vect_compute_data_ref_alignment (dr))
1178 /* Function vect_update_misalignment_for_peel
1180 DR - the data reference whose misalignment is to be adjusted.
1181 DR_PEEL - the data reference whose misalignment is being made
1182 zero in the vector loop by the peel.
1183 NPEEL - the number of iterations in the peel loop if the misalignment
1184 of DR_PEEL is known at compile time. */
1187 vect_update_misalignment_for_peel (struct data_reference *dr,
1188 struct data_reference *dr_peel, int npeel)
1191 VEC(dr_p,heap) *same_align_drs;
1192 struct data_reference *current_dr;
1193 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1194 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1195 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1196 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1198 /* For interleaved data accesses the step in the loop must be multiplied by
1199 the size of the interleaving group. */
1200 if (DR_GROUP_FIRST_DR (stmt_info))
1201 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1202 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1203 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1205 if (known_alignment_for_access_p (dr)
1206 && known_alignment_for_access_p (dr_peel)
1207 && (DR_MISALIGNMENT (dr) / dr_size ==
1208 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1210 DR_MISALIGNMENT (dr) = 0;
1214 /* It can be assumed that the data refs with the same alignment as dr_peel
1215 are aligned in the vector loop. */
1217 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1218 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1220 if (current_dr != dr)
1222 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1223 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1224 DR_MISALIGNMENT (dr) = 0;
1228 if (known_alignment_for_access_p (dr)
1229 && known_alignment_for_access_p (dr_peel))
1231 DR_MISALIGNMENT (dr) += npeel * dr_size;
1232 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1236 if (vect_print_dump_info (REPORT_DETAILS))
1237 fprintf (vect_dump, "Setting misalignment to -1.");
1238 DR_MISALIGNMENT (dr) = -1;
1242 /* Function vect_verify_datarefs_alignment
1244 Return TRUE if all data references in the loop can be
1245 handled with respect to alignment. */
1248 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1250 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1251 struct data_reference *dr;
1252 enum dr_alignment_support supportable_dr_alignment;
1255 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1257 tree stmt = DR_STMT (dr);
1258 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1260 /* For interleaving, only the alignment of the first access matters. */
1261 if (DR_GROUP_FIRST_DR (stmt_info)
1262 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1265 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1266 if (!supportable_dr_alignment)
1268 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1270 if (DR_IS_READ (dr))
1272 "not vectorized: unsupported unaligned load.");
1275 "not vectorized: unsupported unaligned store.");
1279 if (supportable_dr_alignment != dr_aligned
1280 && vect_print_dump_info (REPORT_ALIGNMENT))
1281 fprintf (vect_dump, "Vectorizing an unaligned access.");
1287 /* Function vect_enhance_data_refs_alignment
1289 This pass will use loop versioning and loop peeling in order to enhance
1290 the alignment of data references in the loop.
1292 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1293 original loop is to be vectorized; Any other loops that are created by
1294 the transformations performed in this pass - are not supposed to be
1295 vectorized. This restriction will be relaxed.
1297 This pass will require a cost model to guide it whether to apply peeling
1298 or versioning or a combination of the two. For example, the scheme that
1299 intel uses when given a loop with several memory accesses, is as follows:
1300 choose one memory access ('p') which alignment you want to force by doing
1301 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1302 other accesses are not necessarily aligned, or (2) use loop versioning to
1303 generate one loop in which all accesses are aligned, and another loop in
1304 which only 'p' is necessarily aligned.
1306 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1307 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1308 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1310 Devising a cost model is the most critical aspect of this work. It will
1311 guide us on which access to peel for, whether to use loop versioning, how
1312 many versions to create, etc. The cost model will probably consist of
1313 generic considerations as well as target specific considerations (on
1314 powerpc for example, misaligned stores are more painful than misaligned
1317 Here are the general steps involved in alignment enhancements:
1319 -- original loop, before alignment analysis:
1320 for (i=0; i<N; i++){
1321 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1322 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1325 -- After vect_compute_data_refs_alignment:
1326 for (i=0; i<N; i++){
1327 x = q[i]; # DR_MISALIGNMENT(q) = 3
1328 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1331 -- Possibility 1: we do loop versioning:
1333 for (i=0; i<N; i++){ # loop 1A
1334 x = q[i]; # DR_MISALIGNMENT(q) = 3
1335 p[i] = y; # DR_MISALIGNMENT(p) = 0
1339 for (i=0; i<N; i++){ # loop 1B
1340 x = q[i]; # DR_MISALIGNMENT(q) = 3
1341 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1345 -- Possibility 2: we do loop peeling:
1346 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1350 for (i = 3; i < N; i++){ # loop 2A
1351 x = q[i]; # DR_MISALIGNMENT(q) = 0
1352 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1355 -- Possibility 3: combination of loop peeling and versioning:
1356 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1361 for (i = 3; i<N; i++){ # loop 3A
1362 x = q[i]; # DR_MISALIGNMENT(q) = 0
1363 p[i] = y; # DR_MISALIGNMENT(p) = 0
1367 for (i = 3; i<N; i++){ # loop 3B
1368 x = q[i]; # DR_MISALIGNMENT(q) = 0
1369 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1373 These loops are later passed to loop_transform to be vectorized. The
1374 vectorizer will use the alignment information to guide the transformation
1375 (whether to generate regular loads/stores, or with special handling for
1379 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1381 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1382 enum dr_alignment_support supportable_dr_alignment;
1383 struct data_reference *dr0 = NULL;
1384 struct data_reference *dr;
1386 bool do_peeling = false;
1387 bool do_versioning = false;
1390 stmt_vec_info stmt_info;
1392 if (vect_print_dump_info (REPORT_DETAILS))
1393 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1395 /* While cost model enhancements are expected in the future, the high level
1396 view of the code at this time is as follows:
1398 A) If there is a misaligned write then see if peeling to align this write
1399 can make all data references satisfy vect_supportable_dr_alignment.
1400 If so, update data structures as needed and return true. Note that
1401 at this time vect_supportable_dr_alignment is known to return false
1402 for a a misaligned write.
1404 B) If peeling wasn't possible and there is a data reference with an
1405 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1406 then see if loop versioning checks can be used to make all data
1407 references satisfy vect_supportable_dr_alignment. If so, update
1408 data structures as needed and return true.
1410 C) If neither peeling nor versioning were successful then return false if
1411 any data reference does not satisfy vect_supportable_dr_alignment.
1413 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1415 Note, Possibility 3 above (which is peeling and versioning together) is not
1416 being done at this time. */
1418 /* (1) Peeling to force alignment. */
1420 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1422 + How many accesses will become aligned due to the peeling
1423 - How many accesses will become unaligned due to the peeling,
1424 and the cost of misaligned accesses.
1425 - The cost of peeling (the extra runtime checks, the increase
1428 The scheme we use FORNOW: peel to force the alignment of the first
1429 misaligned store in the loop.
1430 Rationale: misaligned stores are not yet supported.
1432 TODO: Use a cost model. */
1434 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1436 stmt = DR_STMT (dr);
1437 stmt_info = vinfo_for_stmt (stmt);
1439 /* For interleaving, only the alignment of the first access
1441 if (DR_GROUP_FIRST_DR (stmt_info)
1442 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1445 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1447 if (DR_GROUP_FIRST_DR (stmt_info))
1449 /* For interleaved access we peel only if number of iterations in
1450 the prolog loop ({VF - misalignment}), is a multiple of the
1451 number of the interleaved accesses. */
1452 int elem_size, mis_in_elements;
1453 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1455 /* FORNOW: handle only known alignment. */
1456 if (!known_alignment_for_access_p (dr))
1462 elem_size = UNITS_PER_SIMD_WORD / vf;
1463 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1465 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1477 /* Often peeling for alignment will require peeling for loop-bound, which in
1478 turn requires that we know how to adjust the loop ivs after the loop. */
1479 if (!vect_can_advance_ivs_p (loop_vinfo))
1487 if (known_alignment_for_access_p (dr0))
1489 /* Since it's known at compile time, compute the number of iterations
1490 in the peeled loop (the peeling factor) for use in updating
1491 DR_MISALIGNMENT values. The peeling factor is the vectorization
1492 factor minus the misalignment as an element count. */
1493 mis = DR_MISALIGNMENT (dr0);
1494 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1495 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1497 /* For interleaved data access every iteration accesses all the
1498 members of the group, therefore we divide the number of iterations
1499 by the group size. */
1500 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1501 if (DR_GROUP_FIRST_DR (stmt_info))
1502 npeel /= DR_GROUP_SIZE (stmt_info);
1504 if (vect_print_dump_info (REPORT_DETAILS))
1505 fprintf (vect_dump, "Try peeling by %d", npeel);
1508 /* Ensure that all data refs can be vectorized after the peel. */
1509 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1511 int save_misalignment;
1516 stmt = DR_STMT (dr);
1517 stmt_info = vinfo_for_stmt (stmt);
1518 /* For interleaving, only the alignment of the first access
1520 if (DR_GROUP_FIRST_DR (stmt_info)
1521 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1524 save_misalignment = DR_MISALIGNMENT (dr);
1525 vect_update_misalignment_for_peel (dr, dr0, npeel);
1526 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1527 DR_MISALIGNMENT (dr) = save_misalignment;
1529 if (!supportable_dr_alignment)
1538 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1539 If the misalignment of DR_i is identical to that of dr0 then set
1540 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1541 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1542 by the peeling factor times the element size of DR_i (MOD the
1543 vectorization factor times the size). Otherwise, the
1544 misalignment of DR_i must be set to unknown. */
1545 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1547 vect_update_misalignment_for_peel (dr, dr0, npeel);
1549 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1550 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1551 DR_MISALIGNMENT (dr0) = 0;
1552 if (vect_print_dump_info (REPORT_ALIGNMENT))
1553 fprintf (vect_dump, "Alignment of access forced using peeling.");
1555 if (vect_print_dump_info (REPORT_DETAILS))
1556 fprintf (vect_dump, "Peeling for alignment will be applied.");
1558 stat = vect_verify_datarefs_alignment (loop_vinfo);
1565 /* (2) Versioning to force alignment. */
1567 /* Try versioning if:
1568 1) flag_tree_vect_loop_version is TRUE
1569 2) optimize_size is FALSE
1570 3) there is at least one unsupported misaligned data ref with an unknown
1572 4) all misaligned data refs with a known misalignment are supported, and
1573 5) the number of runtime alignment checks is within reason. */
1575 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1579 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1581 stmt = DR_STMT (dr);
1582 stmt_info = vinfo_for_stmt (stmt);
1584 /* For interleaving, only the alignment of the first access
1586 if (aligned_access_p (dr)
1587 || (DR_GROUP_FIRST_DR (stmt_info)
1588 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1591 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1593 if (!supportable_dr_alignment)
1599 if (known_alignment_for_access_p (dr)
1600 || VEC_length (tree,
1601 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1602 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1604 do_versioning = false;
1608 stmt = DR_STMT (dr);
1609 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1610 gcc_assert (vectype);
1612 /* The rightmost bits of an aligned address must be zeros.
1613 Construct the mask needed for this test. For example,
1614 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1615 mask must be 15 = 0xf. */
1616 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1618 /* FORNOW: use the same mask to test all potentially unaligned
1619 references in the loop. The vectorizer currently supports
1620 a single vector size, see the reference to
1621 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1622 vectorization factor is computed. */
1623 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1624 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1625 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1626 VEC_safe_push (tree, heap,
1627 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1632 /* Versioning requires at least one misaligned data reference. */
1633 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1634 do_versioning = false;
1635 else if (!do_versioning)
1636 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1641 VEC(tree,heap) *may_misalign_stmts
1642 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1645 /* It can now be assumed that the data references in the statements
1646 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1647 of the loop being vectorized. */
1648 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1651 dr = STMT_VINFO_DATA_REF (stmt_info);
1652 DR_MISALIGNMENT (dr) = 0;
1653 if (vect_print_dump_info (REPORT_ALIGNMENT))
1654 fprintf (vect_dump, "Alignment of access forced using versioning.");
1657 if (vect_print_dump_info (REPORT_DETAILS))
1658 fprintf (vect_dump, "Versioning for alignment will be applied.");
1660 /* Peeling and versioning can't be done together at this time. */
1661 gcc_assert (! (do_peeling && do_versioning));
1663 stat = vect_verify_datarefs_alignment (loop_vinfo);
1668 /* This point is reached if neither peeling nor versioning is being done. */
1669 gcc_assert (! (do_peeling || do_versioning));
1671 stat = vect_verify_datarefs_alignment (loop_vinfo);
1676 /* Function vect_analyze_data_refs_alignment
1678 Analyze the alignment of the data-references in the loop.
1679 Return FALSE if a data reference is found that cannot be vectorized. */
1682 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1684 if (vect_print_dump_info (REPORT_DETAILS))
1685 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1687 if (!vect_compute_data_refs_alignment (loop_vinfo))
1689 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1691 "not vectorized: can't calculate alignment for data ref.");
1699 /* Function vect_analyze_data_ref_access.
1701 Analyze the access pattern of the data-reference DR. For now, a data access
1702 has to be consecutive to be considered vectorizable. */
1705 vect_analyze_data_ref_access (struct data_reference *dr)
1707 tree step = DR_STEP (dr);
1708 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1709 tree scalar_type = TREE_TYPE (DR_REF (dr));
1710 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1711 tree stmt = DR_STMT (dr);
1712 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1713 interleaving group (including gaps). */
1714 HOST_WIDE_INT stride = dr_step / type_size;
1718 if (vect_print_dump_info (REPORT_DETAILS))
1719 fprintf (vect_dump, "bad data-ref access");
1724 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1726 /* Mark that it is not interleaving. */
1727 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1731 /* Not consecutive access is possible only if it is a part of interleaving. */
1732 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1734 /* Check if it this DR is a part of interleaving, and is a single
1735 element of the group that is accessed in the loop. */
1737 /* Gaps are supported only for loads. STEP must be a multiple of the type
1738 size. The size of the group must be a power of 2. */
1740 && (dr_step % type_size) == 0
1742 && exact_log2 (stride) != -1)
1744 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1745 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1746 if (vect_print_dump_info (REPORT_DR_DETAILS))
1748 fprintf (vect_dump, "Detected single element interleaving %d ",
1749 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1750 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1751 fprintf (vect_dump, " step ");
1752 print_generic_expr (vect_dump, step, TDF_SLIM);
1756 if (vect_print_dump_info (REPORT_DETAILS))
1757 fprintf (vect_dump, "not consecutive access");
1761 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1763 /* First stmt in the interleaving chain. Check the chain. */
1764 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1765 struct data_reference *data_ref = dr;
1766 unsigned int count = 1;
1768 tree prev_init = DR_INIT (data_ref);
1770 HOST_WIDE_INT diff, count_in_bytes;
1774 /* Skip same data-refs. In case that two or more stmts share data-ref
1775 (supported only for loads), we vectorize only the first stmt, and
1776 the rest get their vectorized loads from the the first one. */
1777 if (!tree_int_cst_compare (DR_INIT (data_ref),
1778 DR_INIT (STMT_VINFO_DATA_REF (
1779 vinfo_for_stmt (next)))))
1781 /* For load use the same data-ref load. (We check in
1782 vect_check_dependences() that there are no two stores to the
1784 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1787 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1792 /* Check that all the accesses have the same STEP. */
1793 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1794 if (tree_int_cst_compare (step, next_step))
1796 if (vect_print_dump_info (REPORT_DETAILS))
1797 fprintf (vect_dump, "not consecutive access in interleaving");
1801 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1802 /* Check that the distance between two accesses is equal to the type
1803 size. Otherwise, we have gaps. */
1804 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1805 - TREE_INT_CST_LOW (prev_init)) / type_size;
1806 if (!DR_IS_READ (data_ref) && diff != 1)
1808 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "interleaved store with gaps");
1812 /* Store the gap from the previous member of the group. If there is no
1813 gap in the access, DR_GROUP_GAP is always 1. */
1814 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1816 prev_init = DR_INIT (data_ref);
1817 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1818 /* Count the number of data-refs in the chain. */
1822 /* COUNT is the number of accesses found, we multiply it by the size of
1823 the type to get COUNT_IN_BYTES. */
1824 count_in_bytes = type_size * count;
1826 /* Check that the size of the interleaving is not greater than STEP. */
1827 if (dr_step < count_in_bytes)
1829 if (vect_print_dump_info (REPORT_DETAILS))
1831 fprintf (vect_dump, "interleaving size is greater than step for ");
1832 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1837 /* Check that the size of the interleaving is equal to STEP for stores,
1838 i.e., that there are no gaps. */
1839 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1841 if (vect_print_dump_info (REPORT_DETAILS))
1842 fprintf (vect_dump, "interleaved store with gaps");
1846 /* Check that STEP is a multiple of type size. */
1847 if ((dr_step % type_size) != 0)
1849 if (vect_print_dump_info (REPORT_DETAILS))
1851 fprintf (vect_dump, "step is not a multiple of type size: step ");
1852 print_generic_expr (vect_dump, step, TDF_SLIM);
1853 fprintf (vect_dump, " size ");
1854 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1860 /* FORNOW: we handle only interleaving that is a power of 2. */
1861 if (exact_log2 (stride) == -1)
1863 if (vect_print_dump_info (REPORT_DETAILS))
1864 fprintf (vect_dump, "interleaving is not a power of 2");
1867 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1873 /* Function vect_analyze_data_ref_accesses.
1875 Analyze the access pattern of all the data references in the loop.
1877 FORNOW: the only access pattern that is considered vectorizable is a
1878 simple step 1 (consecutive) access.
1880 FORNOW: handle only arrays and pointer accesses. */
1883 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1886 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1887 struct data_reference *dr;
1889 if (vect_print_dump_info (REPORT_DETAILS))
1890 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1892 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1893 if (!vect_analyze_data_ref_access (dr))
1895 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1896 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1904 /* Function vect_analyze_data_refs.
1906 Find all the data references in the loop.
1908 The general structure of the analysis of data refs in the vectorizer is as
1910 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1911 find and analyze all data-refs in the loop and their dependences.
1912 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1913 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1914 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1919 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1921 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1923 VEC (data_reference_p, heap) *datarefs;
1924 struct data_reference *dr;
1927 if (vect_print_dump_info (REPORT_DETAILS))
1928 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1930 compute_data_dependences_for_loop (loop, true,
1931 &LOOP_VINFO_DATAREFS (loop_vinfo),
1932 &LOOP_VINFO_DDRS (loop_vinfo));
1934 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1935 from stmt_vec_info struct to DR and vectype. */
1936 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1938 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1941 stmt_vec_info stmt_info;
1943 if (!dr || !DR_REF (dr))
1945 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1946 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1950 /* Update DR field in stmt_vec_info struct. */
1951 stmt = DR_STMT (dr);
1952 stmt_info = vinfo_for_stmt (stmt);
1954 if (STMT_VINFO_DATA_REF (stmt_info))
1956 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1959 "not vectorized: more than one data ref in stmt: ");
1960 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1964 STMT_VINFO_DATA_REF (stmt_info) = dr;
1966 /* Check that analysis of the data-ref succeeded. */
1967 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1970 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1972 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1973 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1977 if (!DR_MEMTAG (dr))
1979 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1981 fprintf (vect_dump, "not vectorized: no memory tag for ");
1982 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1987 /* Set vectype for STMT. */
1988 scalar_type = TREE_TYPE (DR_REF (dr));
1989 STMT_VINFO_VECTYPE (stmt_info) =
1990 get_vectype_for_scalar_type (scalar_type);
1991 if (!STMT_VINFO_VECTYPE (stmt_info))
1993 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1996 "not vectorized: no vectype for stmt: ");
1997 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1998 fprintf (vect_dump, " scalar_type: ");
1999 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2009 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2011 /* Function vect_mark_relevant.
2013 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2016 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2017 enum vect_relevant relevant, bool live_p)
2019 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2020 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2021 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2023 if (vect_print_dump_info (REPORT_DETAILS))
2024 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2026 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2030 /* This is the last stmt in a sequence that was detected as a
2031 pattern that can potentially be vectorized. Don't mark the stmt
2032 as relevant/live because it's not going to vectorized.
2033 Instead mark the pattern-stmt that replaces it. */
2034 if (vect_print_dump_info (REPORT_DETAILS))
2035 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2036 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2037 stmt_info = vinfo_for_stmt (pattern_stmt);
2038 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2039 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2040 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2041 stmt = pattern_stmt;
2044 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2045 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2046 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2048 if (TREE_CODE (stmt) == PHI_NODE)
2049 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2050 or live will fail vectorization later on. */
2053 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2054 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2056 if (vect_print_dump_info (REPORT_DETAILS))
2057 fprintf (vect_dump, "already marked relevant/live.");
2061 VEC_safe_push (tree, heap, *worklist, stmt);
2065 /* Function vect_stmt_relevant_p.
2067 Return true if STMT in loop that is represented by LOOP_VINFO is
2068 "relevant for vectorization".
2070 A stmt is considered "relevant for vectorization" if:
2071 - it has uses outside the loop.
2072 - it has vdefs (it alters memory).
2073 - control stmts in the loop (except for the exit condition).
2075 CHECKME: what other side effects would the vectorizer allow? */
2078 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2079 enum vect_relevant *relevant, bool *live_p)
2081 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2082 ssa_op_iter op_iter;
2083 imm_use_iterator imm_iter;
2084 use_operand_p use_p;
2085 def_operand_p def_p;
2087 *relevant = vect_unused_in_loop;
2090 /* cond stmt other than loop exit cond. */
2091 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2092 *relevant = vect_used_in_loop;
2094 /* changing memory. */
2095 if (TREE_CODE (stmt) != PHI_NODE)
2096 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2098 if (vect_print_dump_info (REPORT_DETAILS))
2099 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2100 *relevant = vect_used_in_loop;
2103 /* uses outside the loop. */
2104 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2106 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2108 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2109 if (!flow_bb_inside_loop_p (loop, bb))
2111 if (vect_print_dump_info (REPORT_DETAILS))
2112 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2114 /* We expect all such uses to be in the loop exit phis
2115 (because of loop closed form) */
2116 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2117 gcc_assert (bb == single_exit (loop)->dest);
2124 return (*live_p || *relevant);
2128 /* Function vect_mark_stmts_to_be_vectorized.
2130 Not all stmts in the loop need to be vectorized. For example:
2139 Stmt 1 and 3 do not need to be vectorized, because loop control and
2140 addressing of vectorized data-refs are handled differently.
2142 This pass detects such stmts. */
2145 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2147 VEC(tree,heap) *worklist;
2148 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2149 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2150 unsigned int nbbs = loop->num_nodes;
2151 block_stmt_iterator si;
2156 stmt_vec_info stmt_vinfo;
2160 enum vect_relevant relevant;
2162 enum vect_def_type dt;
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2167 worklist = VEC_alloc (tree, heap, 64);
2169 /* 1. Init worklist. */
2172 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2174 if (vect_print_dump_info (REPORT_DETAILS))
2176 fprintf (vect_dump, "init: phi relevant? ");
2177 print_generic_expr (vect_dump, phi, TDF_SLIM);
2180 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2181 vect_mark_relevant (&worklist, phi, relevant, live_p);
2184 for (i = 0; i < nbbs; i++)
2187 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2189 stmt = bsi_stmt (si);
2191 if (vect_print_dump_info (REPORT_DETAILS))
2193 fprintf (vect_dump, "init: stmt relevant? ");
2194 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2197 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2198 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2203 /* 2. Process_worklist */
2205 while (VEC_length (tree, worklist) > 0)
2207 stmt = VEC_pop (tree, worklist);
2209 if (vect_print_dump_info (REPORT_DETAILS))
2211 fprintf (vect_dump, "worklist: examine stmt: ");
2212 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2215 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2216 in the loop, mark the stmt that defines it (DEF_STMT) as
2217 relevant/irrelevant and live/dead according to the liveness and
2218 relevance properties of STMT.
2221 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2223 ann = stmt_ann (stmt);
2224 stmt_vinfo = vinfo_for_stmt (stmt);
2226 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2227 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2229 /* Generally, the liveness and relevance properties of STMT are
2230 propagated to the DEF_STMTs of its USEs:
2231 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2232 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2237 If USE is used only for address computations (e.g. array indexing),
2238 which does not need to be directly vectorized, then the
2239 liveness/relevance of the respective DEF_STMT is left unchanged.
2242 If STMT has been identified as defining a reduction variable, then
2245 The last use of STMT is the reduction-variable, which is defined
2246 by a loop-header-phi. We don't want to mark the phi as live or
2247 relevant (because it does not need to be vectorized, it is handled
2248 as part of the vectorization of the reduction), so in this case we
2249 skip the call to vect_mark_relevant.
2251 The rest of the uses of STMT are defined in the loop body. For
2252 the def_stmt of these uses we want to set liveness/relevance
2254 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2255 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2256 because even though STMT is classified as live (since it defines a
2257 value that is used across loop iterations) and irrelevant (since it
2258 is not used inside the loop), it will be vectorized, and therefore
2259 the corresponding DEF_STMTs need to marked as relevant.
2260 We distinguish between two kinds of relevant stmts - those that are
2261 used by a reduction computation, and those that are (also) used by
2262 a regular computation. This allows us later on to identify stmts
2263 that are used solely by a reduction, and therefore the order of
2264 the results that they produce does not have to be kept.
2268 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2270 gcc_assert (relevant == vect_unused_in_loop && live_p);
2271 relevant = vect_used_by_reduction;
2275 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2277 /* case 1: we are only interested in uses that need to be vectorized.
2278 Uses that are used for address computation are not considered
2281 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2284 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2286 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2287 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2288 VEC_free (tree, heap, worklist);
2292 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2295 if (vect_print_dump_info (REPORT_DETAILS))
2297 fprintf (vect_dump, "worklist: examine use %d: ", i);
2298 print_generic_expr (vect_dump, use, TDF_SLIM);
2301 bb = bb_for_stmt (def_stmt);
2302 if (!flow_bb_inside_loop_p (loop, bb))
2305 /* case 2.1: the reduction-use does not mark the defining-phi
2307 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2308 && TREE_CODE (def_stmt) == PHI_NODE)
2311 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2313 } /* while worklist */
2315 VEC_free (tree, heap, worklist);
2320 /* Function vect_can_advance_ivs_p
2322 In case the number of iterations that LOOP iterates is unknown at compile
2323 time, an epilog loop will be generated, and the loop induction variables
2324 (IVs) will be "advanced" to the value they are supposed to take just before
2325 the epilog loop. Here we check that the access function of the loop IVs
2326 and the expression that represents the loop bound are simple enough.
2327 These restrictions will be relaxed in the future. */
2330 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2332 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2333 basic_block bb = loop->header;
2336 /* Analyze phi functions of the loop header. */
2338 if (vect_print_dump_info (REPORT_DETAILS))
2339 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2341 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2343 tree access_fn = NULL;
2344 tree evolution_part;
2346 if (vect_print_dump_info (REPORT_DETAILS))
2348 fprintf (vect_dump, "Analyze phi: ");
2349 print_generic_expr (vect_dump, phi, TDF_SLIM);
2352 /* Skip virtual phi's. The data dependences that are associated with
2353 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2355 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2357 if (vect_print_dump_info (REPORT_DETAILS))
2358 fprintf (vect_dump, "virtual phi. skip.");
2362 /* Skip reduction phis. */
2364 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2366 if (vect_print_dump_info (REPORT_DETAILS))
2367 fprintf (vect_dump, "reduc phi. skip.");
2371 /* Analyze the evolution function. */
2373 access_fn = instantiate_parameters
2374 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "No Access function.");
2383 if (vect_print_dump_info (REPORT_DETAILS))
2385 fprintf (vect_dump, "Access function of PHI: ");
2386 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2389 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2391 if (evolution_part == NULL_TREE)
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "No evolution.");
2398 /* FORNOW: We do not transform initial conditions of IVs
2399 which evolution functions are a polynomial of degree >= 2. */
2401 if (tree_is_chrec (evolution_part))
2409 /* Function vect_get_loop_niters.
2411 Determine how many iterations the loop is executed.
2412 If an expression that represents the number of iterations
2413 can be constructed, place it in NUMBER_OF_ITERATIONS.
2414 Return the loop exit condition. */
2417 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2421 if (vect_print_dump_info (REPORT_DETAILS))
2422 fprintf (vect_dump, "=== get_loop_niters ===");
2424 niters = number_of_exit_cond_executions (loop);
2426 if (niters != NULL_TREE
2427 && niters != chrec_dont_know)
2429 *number_of_iterations = niters;
2431 if (vect_print_dump_info (REPORT_DETAILS))
2433 fprintf (vect_dump, "==> get_loop_niters:" );
2434 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2438 return get_loop_exit_condition (loop);
2442 /* Function vect_analyze_loop_form.
2444 Verify the following restrictions (some may be relaxed in the future):
2445 - it's an inner-most loop
2446 - number of BBs = 2 (which are the loop header and the latch)
2447 - the loop has a pre-header
2448 - the loop has a single entry and exit
2449 - the loop exit condition is simple enough, and the number of iterations
2450 can be analyzed (a countable loop). */
2452 static loop_vec_info
2453 vect_analyze_loop_form (struct loop *loop)
2455 loop_vec_info loop_vinfo;
2457 tree number_of_iterations = NULL;
2459 if (vect_print_dump_info (REPORT_DETAILS))
2460 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2464 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2465 fprintf (vect_dump, "not vectorized: nested loop.");
2469 if (!single_exit (loop)
2470 || loop->num_nodes != 2
2471 || EDGE_COUNT (loop->header->preds) != 2)
2473 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2475 if (!single_exit (loop))
2476 fprintf (vect_dump, "not vectorized: multiple exits.");
2477 else if (loop->num_nodes != 2)
2478 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2479 else if (EDGE_COUNT (loop->header->preds) != 2)
2480 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2486 /* We assume that the loop exit condition is at the end of the loop. i.e,
2487 that the loop is represented as a do-while (with a proper if-guard
2488 before the loop if needed), where the loop header contains all the
2489 executable statements, and the latch is empty. */
2490 if (!empty_block_p (loop->latch)
2491 || phi_nodes (loop->latch))
2493 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2494 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2498 /* Make sure there exists a single-predecessor exit bb: */
2499 if (!single_pred_p (single_exit (loop)->dest))
2501 edge e = single_exit (loop);
2502 if (!(e->flags & EDGE_ABNORMAL))
2504 split_loop_exit_edge (e);
2505 if (vect_print_dump_info (REPORT_DETAILS))
2506 fprintf (vect_dump, "split exit edge.");
2510 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2511 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2516 if (empty_block_p (loop->header))
2518 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2519 fprintf (vect_dump, "not vectorized: empty loop.");
2523 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2526 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2527 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2531 if (!number_of_iterations)
2533 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2535 "not vectorized: number of iterations cannot be computed.");
2539 if (chrec_contains_undetermined (number_of_iterations))
2541 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2542 fprintf (vect_dump, "Infinite number of iterations.");
2546 loop_vinfo = new_loop_vec_info (loop);
2547 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2549 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2551 if (vect_print_dump_info (REPORT_DETAILS))
2553 fprintf (vect_dump, "Symbolic number of iterations is ");
2554 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2558 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2560 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2561 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2565 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2571 /* Function vect_analyze_loop.
2573 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2574 for it. The different analyses will record information in the
2575 loop_vec_info struct. */
2577 vect_analyze_loop (struct loop *loop)
2580 loop_vec_info loop_vinfo;
2582 if (vect_print_dump_info (REPORT_DETAILS))
2583 fprintf (vect_dump, "===== analyze_loop_nest =====");
2585 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2587 loop_vinfo = vect_analyze_loop_form (loop);
2590 if (vect_print_dump_info (REPORT_DETAILS))
2591 fprintf (vect_dump, "bad loop form.");
2595 /* Find all data references in the loop (which correspond to vdefs/vuses)
2596 and analyze their evolution in the loop.
2598 FORNOW: Handle only simple, array references, which
2599 alignment can be forced, and aligned pointer-references. */
2601 ok = vect_analyze_data_refs (loop_vinfo);
2604 if (vect_print_dump_info (REPORT_DETAILS))
2605 fprintf (vect_dump, "bad data references.");
2606 destroy_loop_vec_info (loop_vinfo);
2610 /* Classify all cross-iteration scalar data-flow cycles.
2611 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2613 vect_analyze_scalar_cycles (loop_vinfo);
2615 vect_pattern_recog (loop_vinfo);
2617 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2619 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2622 if (vect_print_dump_info (REPORT_DETAILS))
2623 fprintf (vect_dump, "unexpected pattern.");
2624 destroy_loop_vec_info (loop_vinfo);
2628 /* Analyze the alignment of the data-refs in the loop.
2629 Fail if a data reference is found that cannot be vectorized. */
2631 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2634 if (vect_print_dump_info (REPORT_DETAILS))
2635 fprintf (vect_dump, "bad data alignment.");
2636 destroy_loop_vec_info (loop_vinfo);
2640 ok = vect_determine_vectorization_factor (loop_vinfo);
2643 if (vect_print_dump_info (REPORT_DETAILS))
2644 fprintf (vect_dump, "can't determine vectorization factor.");
2645 destroy_loop_vec_info (loop_vinfo);
2649 /* Analyze data dependences between the data-refs in the loop.
2650 FORNOW: fail at the first data dependence that we encounter. */
2652 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2655 if (vect_print_dump_info (REPORT_DETAILS))
2656 fprintf (vect_dump, "bad data dependence.");
2657 destroy_loop_vec_info (loop_vinfo);
2661 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2662 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2664 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2667 if (vect_print_dump_info (REPORT_DETAILS))
2668 fprintf (vect_dump, "bad data access.");
2669 destroy_loop_vec_info (loop_vinfo);
2673 /* This pass will decide on using loop versioning and/or loop peeling in
2674 order to enhance the alignment of data references in the loop. */
2676 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2679 if (vect_print_dump_info (REPORT_DETAILS))
2680 fprintf (vect_dump, "bad data alignment.");
2681 destroy_loop_vec_info (loop_vinfo);
2685 /* Scan all the operations in the loop and make sure they are
2688 ok = vect_analyze_operations (loop_vinfo);
2691 if (vect_print_dump_info (REPORT_DETAILS))
2692 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2693 destroy_loop_vec_info (loop_vinfo);
2697 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;