1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
103 if (vect_print_dump_info (REPORT_DETAILS))
104 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
106 for (i = 0; i < nbbs; i++)
108 basic_block bb = bbs[i];
110 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
112 tree stmt = bsi_stmt (si);
114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining statement: ");
120 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
126 gcc_assert (stmt_info);
128 /* skip stmts which do not need to be vectorized. */
129 if (!STMT_VINFO_RELEVANT_P (stmt_info)
130 && !STMT_VINFO_LIVE_P (stmt_info))
132 if (vect_print_dump_info (REPORT_DETAILS))
133 fprintf (vect_dump, "skip.");
137 if (!GIMPLE_STMT_P (stmt)
138 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
140 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
143 print_generic_expr (vect_dump, stmt, TDF_SLIM);
148 if (STMT_VINFO_VECTYPE (stmt_info))
150 /* The only case when a vectype had been already set is for stmts
151 that contain a dataref, or for "pattern-stmts" (stmts generated
152 by the vectorizer to represent/replace a certain idiom). */
153 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
154 || is_pattern_stmt_p (stmt_info));
155 vectype = STMT_VINFO_VECTYPE (stmt_info);
159 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
160 && !is_pattern_stmt_p (stmt_info));
162 /* We set the vectype according to the type of the result (lhs).
163 For stmts whose result-type is different than the type of the
164 arguments (e.g. demotion, promotion), vectype will be reset
165 appropriately (later). Note that we have to visit the smallest
166 datatype in this function, because that determines the VF.
167 If the smallest datatype in the loop is present only as the
168 rhs of a promotion operation - we'd miss it here.
169 However, in such a case, that a variable of this datatype
170 does not appear in the lhs anywhere in the loop, it shouldn't
171 affect the vectorization factor. */
172 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
174 if (vect_print_dump_info (REPORT_DETAILS))
176 fprintf (vect_dump, "get vectype for scalar type: ");
177 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
180 vectype = get_vectype_for_scalar_type (scalar_type);
183 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
186 "not vectorized: unsupported data-type ");
187 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
191 STMT_VINFO_VECTYPE (stmt_info) = vectype;
194 if (vect_print_dump_info (REPORT_DETAILS))
196 fprintf (vect_dump, "vectype: ");
197 print_generic_expr (vect_dump, vectype, TDF_SLIM);
200 nunits = TYPE_VECTOR_SUBPARTS (vectype);
201 if (vect_print_dump_info (REPORT_DETAILS))
202 fprintf (vect_dump, "nunits = %d", nunits);
204 if (!vectorization_factor
205 || (nunits > vectorization_factor))
206 vectorization_factor = nunits;
211 /* TODO: Analyze cost. Decide if worth while to vectorize. */
213 if (vectorization_factor <= 1)
215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
216 fprintf (vect_dump, "not vectorized: unsupported data-type");
219 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
225 /* Function vect_analyze_operations.
227 Scan the loop stmts and make sure they are all vectorizable. */
230 vect_analyze_operations (loop_vec_info loop_vinfo)
232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
233 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
234 int nbbs = loop->num_nodes;
235 block_stmt_iterator si;
236 unsigned int vectorization_factor = 0;
240 stmt_vec_info stmt_info;
241 bool need_to_vectorize = false;
243 if (vect_print_dump_info (REPORT_DETAILS))
244 fprintf (vect_dump, "=== vect_analyze_operations ===");
246 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
247 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
249 for (i = 0; i < nbbs; i++)
251 basic_block bb = bbs[i];
253 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
255 stmt_info = vinfo_for_stmt (phi);
256 if (vect_print_dump_info (REPORT_DETAILS))
258 fprintf (vect_dump, "examining phi: ");
259 print_generic_expr (vect_dump, phi, TDF_SLIM);
262 gcc_assert (stmt_info);
264 if (STMT_VINFO_LIVE_P (stmt_info))
266 /* FORNOW: not yet supported. */
267 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
268 fprintf (vect_dump, "not vectorized: value used after loop.");
272 if (STMT_VINFO_RELEVANT_P (stmt_info))
274 /* Most likely a reduction-like computation that is used
276 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
277 fprintf (vect_dump, "not vectorized: unsupported pattern.");
282 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
284 tree stmt = bsi_stmt (si);
285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
287 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "==> examining statement: ");
290 print_generic_expr (vect_dump, stmt, TDF_SLIM);
293 gcc_assert (stmt_info);
295 /* skip stmts which do not need to be vectorized.
296 this is expected to include:
297 - the COND_EXPR which is the loop exit condition
298 - any LABEL_EXPRs in the loop
299 - computations that are used only for array indexing or loop
302 if (!STMT_VINFO_RELEVANT_P (stmt_info)
303 && !STMT_VINFO_LIVE_P (stmt_info))
305 if (vect_print_dump_info (REPORT_DETAILS))
306 fprintf (vect_dump, "irrelevant.");
310 if (STMT_VINFO_RELEVANT_P (stmt_info))
312 gcc_assert (GIMPLE_STMT_P (stmt)
313 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
314 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
316 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
317 || vectorizable_type_demotion (stmt, NULL, NULL)
318 || vectorizable_operation (stmt, NULL, NULL)
319 || vectorizable_assignment (stmt, NULL, NULL)
320 || vectorizable_load (stmt, NULL, NULL)
321 || vectorizable_call (stmt, NULL, NULL)
322 || vectorizable_store (stmt, NULL, NULL)
323 || vectorizable_condition (stmt, NULL, NULL));
327 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
330 "not vectorized: relevant stmt not supported: ");
331 print_generic_expr (vect_dump, stmt, TDF_SLIM);
335 need_to_vectorize = true;
338 if (STMT_VINFO_LIVE_P (stmt_info))
340 ok = vectorizable_reduction (stmt, NULL, NULL);
343 need_to_vectorize = true;
345 ok = vectorizable_live_operation (stmt, NULL, NULL);
349 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
352 "not vectorized: live stmt not supported: ");
353 print_generic_expr (vect_dump, stmt, TDF_SLIM);
361 /* TODO: Analyze cost. Decide if worth while to vectorize. */
363 /* All operations in the loop are either irrelevant (deal with loop
364 control, or dead), or only used outside the loop and can be moved
365 out of the loop (e.g. invariants, inductions). The loop can be
366 optimized away by scalar optimizations. We're better off not
367 touching this loop. */
368 if (!need_to_vectorize)
370 if (vect_print_dump_info (REPORT_DETAILS))
372 "All the computation can be taken out of the loop.");
373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
375 "not vectorized: redundant loop. no profit to vectorize.");
379 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
380 && vect_print_dump_info (REPORT_DETAILS))
382 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
383 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
385 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
386 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
387 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
388 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
389 * vectorization_factor))))
391 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
392 fprintf (vect_dump, "not vectorized: iteration count too small.");
396 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
397 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
398 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
400 if (vect_print_dump_info (REPORT_DETAILS))
401 fprintf (vect_dump, "epilog loop required.");
402 if (!vect_can_advance_ivs_p (loop_vinfo))
404 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
406 "not vectorized: can't create epilog loop 1.");
409 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
411 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
413 "not vectorized: can't create epilog loop 2.");
422 /* Function exist_non_indexing_operands_for_use_p
424 USE is one of the uses attached to STMT. Check if USE is
425 used in STMT for anything other than indexing an array. */
428 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
433 /* USE corresponds to some operand in STMT. If there is no data
434 reference in STMT, then any operand that corresponds to USE
435 is not indexing an array. */
436 if (!STMT_VINFO_DATA_REF (stmt_info))
439 /* STMT has a data_ref. FORNOW this means that its of one of
443 (This should have been verified in analyze_data_refs).
445 'var' in the second case corresponds to a def, not a use,
446 so USE cannot correspond to any operands that are not used
449 Therefore, all we need to check is if STMT falls into the
450 first case, and whether var corresponds to USE. */
452 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
455 operand = GIMPLE_STMT_OPERAND (stmt, 1);
457 if (TREE_CODE (operand) != SSA_NAME)
467 /* Function vect_analyze_scalar_cycles.
469 Examine the cross iteration def-use cycles of scalar variables, by
470 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
471 following: invariant, induction, reduction, unknown.
473 Some forms of scalar cycles are not yet supported.
475 Example1: reduction: (unsupported yet)
481 Example2: induction: (unsupported yet)
487 Note: the following loop *is* vectorizable:
493 even though it has a def-use cycle caused by the induction variable i:
495 loop: i_2 = PHI (i_0, i_1)
500 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
501 it does not need to be vectorized because it is only used for array
502 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
503 loop2 on the other hand is relevant (it is being written to memory).
507 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
510 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
511 basic_block bb = loop->header;
513 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
515 if (vect_print_dump_info (REPORT_DETAILS))
516 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
518 /* First - identify all inductions. */
519 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
521 tree access_fn = NULL;
522 tree def = PHI_RESULT (phi);
523 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
525 if (vect_print_dump_info (REPORT_DETAILS))
527 fprintf (vect_dump, "Analyze phi: ");
528 print_generic_expr (vect_dump, phi, TDF_SLIM);
531 /* Skip virtual phi's. The data dependences that are associated with
532 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
533 if (!is_gimple_reg (SSA_NAME_VAR (def)))
536 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
538 /* Analyze the evolution function. */
539 access_fn = analyze_scalar_evolution (loop, def);
540 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
542 fprintf (vect_dump, "Access function of PHI: ");
543 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
547 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
549 VEC_safe_push (tree, heap, worklist, phi);
553 if (vect_print_dump_info (REPORT_DETAILS))
554 fprintf (vect_dump, "Detected induction.");
555 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
559 /* Second - identify all reductions. */
560 while (VEC_length (tree, worklist) > 0)
562 tree phi = VEC_pop (tree, worklist);
563 tree def = PHI_RESULT (phi);
564 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
567 if (vect_print_dump_info (REPORT_DETAILS))
569 fprintf (vect_dump, "Analyze phi: ");
570 print_generic_expr (vect_dump, phi, TDF_SLIM);
573 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
574 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
576 reduc_stmt = vect_is_simple_reduction (loop, phi);
579 if (vect_print_dump_info (REPORT_DETAILS))
580 fprintf (vect_dump, "Detected reduction.");
581 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
582 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
586 if (vect_print_dump_info (REPORT_DETAILS))
587 fprintf (vect_dump, "Unknown def-use cycle pattern.");
590 VEC_free (tree, heap, worklist);
595 /* Function vect_insert_into_interleaving_chain.
597 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
600 vect_insert_into_interleaving_chain (struct data_reference *dra,
601 struct data_reference *drb)
603 tree prev, next, next_init;
604 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
605 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
607 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
608 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
611 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
612 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
615 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
616 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
620 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
623 /* We got to the end of the list. Insert here. */
624 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
625 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
629 /* Function vect_update_interleaving_chain.
631 For two data-refs DRA and DRB that are a part of a chain interleaved data
632 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
634 There are four possible cases:
635 1. New stmts - both DRA and DRB are not a part of any chain:
638 2. DRB is a part of a chain and DRA is not:
639 no need to update FIRST_DR
640 no need to insert DRB
641 insert DRA according to init
642 3. DRA is a part of a chain and DRB is not:
643 if (init of FIRST_DR > init of DRB)
645 NEXT(FIRST_DR) = previous FIRST_DR
647 insert DRB according to its init
648 4. both DRA and DRB are in some interleaving chains:
649 choose the chain with the smallest init of FIRST_DR
650 insert the nodes of the second chain into the first one. */
653 vect_update_interleaving_chain (struct data_reference *drb,
654 struct data_reference *dra)
656 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
657 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
658 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
659 tree node, prev, next, node_init, first_stmt;
661 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
662 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
664 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
665 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
666 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
670 /* 2. DRB is a part of a chain and DRA is not. */
671 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
673 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
674 /* Insert DRA into the chain of DRB. */
675 vect_insert_into_interleaving_chain (dra, drb);
679 /* 3. DRA is a part of a chain and DRB is not. */
680 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
682 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
683 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
687 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
689 /* DRB's init is smaller than the init of the stmt previously marked
690 as the first stmt of the interleaving chain of DRA. Therefore, we
691 update FIRST_STMT and put DRB in the head of the list. */
692 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
693 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
695 /* Update all the stmts in the list to point to the new FIRST_STMT. */
696 tmp = old_first_stmt;
699 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
700 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
705 /* Insert DRB in the list of DRA. */
706 vect_insert_into_interleaving_chain (drb, dra);
707 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
712 /* 4. both DRA and DRB are in some interleaving chains. */
713 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
714 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
715 if (first_a == first_b)
717 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
718 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
720 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
722 /* Insert the nodes of DRA chain into the DRB chain.
723 After inserting a node, continue from this node of the DRB chain (don't
724 start from the beginning. */
725 node = DR_GROUP_FIRST_DR (stmtinfo_a);
726 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
727 first_stmt = first_b;
731 /* Insert the nodes of DRB chain into the DRA chain.
732 After inserting a node, continue from this node of the DRA chain (don't
733 start from the beginning. */
734 node = DR_GROUP_FIRST_DR (stmtinfo_b);
735 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
736 first_stmt = first_a;
741 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
742 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
745 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
746 if (tree_int_cst_compare (next_init, node_init) > 0)
749 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
750 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
755 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
759 /* We got to the end of the list. Insert here. */
760 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
761 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
764 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
765 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
770 /* Function vect_equal_offsets.
772 Check if OFFSET1 and OFFSET2 are identical expressions. */
775 vect_equal_offsets (tree offset1, tree offset2)
779 STRIP_NOPS (offset1);
780 STRIP_NOPS (offset2);
782 if (offset1 == offset2)
785 if (TREE_CODE (offset1) != TREE_CODE (offset2)
786 || !BINARY_CLASS_P (offset1)
787 || !BINARY_CLASS_P (offset2))
790 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
791 TREE_OPERAND (offset2, 0));
792 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
793 TREE_OPERAND (offset2, 1));
795 return (res0 && res1);
799 /* Function vect_check_interleaving.
801 Check if DRA and DRB are a part of interleaving. In case they are, insert
802 DRA and DRB in an interleaving chain. */
805 vect_check_interleaving (struct data_reference *dra,
806 struct data_reference *drb)
808 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
810 /* Check that the data-refs have same first location (except init) and they
811 are both either store or load (not load and store). */
812 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
813 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
814 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
815 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
816 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
817 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
818 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
819 || DR_IS_READ (dra) != DR_IS_READ (drb))
823 1. data-refs are of the same type
824 2. their steps are equal
825 3. the step is greater than the difference between data-refs' inits */
826 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
827 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
829 if (type_size_a != type_size_b
830 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
833 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
834 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
835 step = TREE_INT_CST_LOW (DR_STEP (dra));
839 /* If init_a == init_b + the size of the type * k, we have an interleaving,
840 and DRB is accessed before DRA. */
841 diff_mod_size = (init_a - init_b) % type_size_a;
843 if ((init_a - init_b) > step)
846 if (diff_mod_size == 0)
848 vect_update_interleaving_chain (drb, dra);
849 if (vect_print_dump_info (REPORT_DR_DETAILS))
851 fprintf (vect_dump, "Detected interleaving ");
852 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
853 fprintf (vect_dump, " and ");
854 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
861 /* If init_b == init_a + the size of the type * k, we have an
862 interleaving, and DRA is accessed before DRB. */
863 diff_mod_size = (init_b - init_a) % type_size_a;
865 if ((init_b - init_a) > step)
868 if (diff_mod_size == 0)
870 vect_update_interleaving_chain (dra, drb);
871 if (vect_print_dump_info (REPORT_DR_DETAILS))
873 fprintf (vect_dump, "Detected interleaving ");
874 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
875 fprintf (vect_dump, " and ");
876 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
884 /* Function vect_analyze_data_ref_dependence.
886 Return TRUE if there (might) exist a dependence between a memory-reference
887 DRA and a memory-reference DRB. */
890 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
891 loop_vec_info loop_vinfo)
894 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
895 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
896 struct data_reference *dra = DDR_A (ddr);
897 struct data_reference *drb = DDR_B (ddr);
898 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
899 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
900 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
901 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
902 lambda_vector dist_v;
903 unsigned int loop_depth;
905 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
907 /* Independent data accesses. */
908 vect_check_interleaving (dra, drb);
912 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
915 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
917 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
920 "not vectorized: can't determine dependence between ");
921 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
922 fprintf (vect_dump, " and ");
923 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
928 if (DDR_NUM_DIST_VECTS (ddr) == 0)
930 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
932 fprintf (vect_dump, "not vectorized: bad dist vector for ");
933 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
934 fprintf (vect_dump, " and ");
935 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
940 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
941 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
943 int dist = dist_v[loop_depth];
945 if (vect_print_dump_info (REPORT_DR_DETAILS))
946 fprintf (vect_dump, "dependence distance = %d.", dist);
948 /* Same loop iteration. */
949 if (dist % vectorization_factor == 0 && dra_size == drb_size)
951 /* Two references with distance zero have the same alignment. */
952 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
953 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
954 if (vect_print_dump_info (REPORT_ALIGNMENT))
955 fprintf (vect_dump, "accesses have the same alignment.");
956 if (vect_print_dump_info (REPORT_DR_DETAILS))
958 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
959 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
960 fprintf (vect_dump, " and ");
961 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
964 /* For interleaving, mark that there is a read-write dependency if
965 necessary. We check before that one of the data-refs is store. */
966 if (DR_IS_READ (dra))
967 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
970 if (DR_IS_READ (drb))
971 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
977 if (abs (dist) >= vectorization_factor)
979 /* Dependence distance does not create dependence, as far as vectorization
980 is concerned, in this case. */
981 if (vect_print_dump_info (REPORT_DR_DETAILS))
982 fprintf (vect_dump, "dependence distance >= VF.");
986 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
989 "not vectorized: possible dependence between data-refs ");
990 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
991 fprintf (vect_dump, " and ");
992 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1002 /* Function vect_analyze_data_ref_dependences.
1004 Examine all the data references in the loop, and make sure there do not
1005 exist any data dependences between them. */
1008 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1011 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1012 struct data_dependence_relation *ddr;
1014 if (vect_print_dump_info (REPORT_DETAILS))
1015 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1017 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1018 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1025 /* Function vect_compute_data_ref_alignment
1027 Compute the misalignment of the data reference DR.
1030 1. If during the misalignment computation it is found that the data reference
1031 cannot be vectorized then false is returned.
1032 2. DR_MISALIGNMENT (DR) is defined.
1034 FOR NOW: No analysis is actually performed. Misalignment is calculated
1035 only for trivial cases. TODO. */
1038 vect_compute_data_ref_alignment (struct data_reference *dr)
1040 tree stmt = DR_STMT (dr);
1041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1042 tree ref = DR_REF (dr);
1044 tree base, base_addr;
1047 tree aligned_to, alignment;
1049 if (vect_print_dump_info (REPORT_DETAILS))
1050 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1052 /* Initialize misalignment to unknown. */
1053 DR_MISALIGNMENT (dr) = -1;
1055 misalign = DR_OFFSET_MISALIGNMENT (dr);
1056 aligned_to = DR_ALIGNED_TO (dr);
1057 base_addr = DR_BASE_ADDRESS (dr);
1058 base = build_fold_indirect_ref (base_addr);
1059 vectype = STMT_VINFO_VECTYPE (stmt_info);
1060 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1062 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1065 if (vect_print_dump_info (REPORT_DETAILS))
1067 fprintf (vect_dump, "Unknown alignment for access: ");
1068 print_generic_expr (vect_dump, base, TDF_SLIM);
1074 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1076 || (TREE_CODE (base_addr) == SSA_NAME
1077 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1078 TREE_TYPE (base_addr)))),
1080 base_aligned = true;
1082 base_aligned = false;
1086 /* Do not change the alignment of global variables if
1087 flag_section_anchors is enabled. */
1088 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1089 || (TREE_STATIC (base) && flag_section_anchors))
1091 if (vect_print_dump_info (REPORT_DETAILS))
1093 fprintf (vect_dump, "can't force alignment of ref: ");
1094 print_generic_expr (vect_dump, ref, TDF_SLIM);
1099 /* Force the alignment of the decl.
1100 NOTE: This is the only change to the code we make during
1101 the analysis phase, before deciding to vectorize the loop. */
1102 if (vect_print_dump_info (REPORT_DETAILS))
1103 fprintf (vect_dump, "force alignment");
1104 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1105 DECL_USER_ALIGN (base) = 1;
1108 /* At this point we assume that the base is aligned. */
1109 gcc_assert (base_aligned
1110 || (TREE_CODE (base) == VAR_DECL
1111 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1113 /* Modulo alignment. */
1114 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1116 if (!host_integerp (misalign, 1))
1118 /* Negative or overflowed misalignment value. */
1119 if (vect_print_dump_info (REPORT_DETAILS))
1120 fprintf (vect_dump, "unexpected misalign value");
1124 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1126 if (vect_print_dump_info (REPORT_DETAILS))
1128 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1129 print_generic_expr (vect_dump, ref, TDF_SLIM);
1136 /* Function vect_compute_data_refs_alignment
1138 Compute the misalignment of data references in the loop.
1139 Return FALSE if a data reference is found that cannot be vectorized. */
1142 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1144 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1145 struct data_reference *dr;
1148 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1149 if (!vect_compute_data_ref_alignment (dr))
1156 /* Function vect_update_misalignment_for_peel
1158 DR - the data reference whose misalignment is to be adjusted.
1159 DR_PEEL - the data reference whose misalignment is being made
1160 zero in the vector loop by the peel.
1161 NPEEL - the number of iterations in the peel loop if the misalignment
1162 of DR_PEEL is known at compile time. */
1165 vect_update_misalignment_for_peel (struct data_reference *dr,
1166 struct data_reference *dr_peel, int npeel)
1169 VEC(dr_p,heap) *same_align_drs;
1170 struct data_reference *current_dr;
1171 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1172 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1173 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1174 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1176 /* For interleaved data accesses the step in the loop must be multiplied by
1177 the size of the interleaving group. */
1178 if (DR_GROUP_FIRST_DR (stmt_info))
1179 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1180 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1181 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1183 if (known_alignment_for_access_p (dr)
1184 && known_alignment_for_access_p (dr_peel)
1185 && (DR_MISALIGNMENT (dr) / dr_size ==
1186 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1188 DR_MISALIGNMENT (dr) = 0;
1192 /* It can be assumed that the data refs with the same alignment as dr_peel
1193 are aligned in the vector loop. */
1195 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1196 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1198 if (current_dr != dr)
1200 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1201 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1202 DR_MISALIGNMENT (dr) = 0;
1206 if (known_alignment_for_access_p (dr)
1207 && known_alignment_for_access_p (dr_peel))
1209 DR_MISALIGNMENT (dr) += npeel * dr_size;
1210 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1214 if (vect_print_dump_info (REPORT_DETAILS))
1215 fprintf (vect_dump, "Setting misalignment to -1.");
1216 DR_MISALIGNMENT (dr) = -1;
1220 /* Function vect_verify_datarefs_alignment
1222 Return TRUE if all data references in the loop can be
1223 handled with respect to alignment. */
1226 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1228 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1229 struct data_reference *dr;
1230 enum dr_alignment_support supportable_dr_alignment;
1233 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1235 tree stmt = DR_STMT (dr);
1236 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1238 /* For interleaving, only the alignment of the first access matters. */
1239 if (DR_GROUP_FIRST_DR (stmt_info)
1240 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1243 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1244 if (!supportable_dr_alignment)
1246 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1248 if (DR_IS_READ (dr))
1250 "not vectorized: unsupported unaligned load.");
1253 "not vectorized: unsupported unaligned store.");
1257 if (supportable_dr_alignment != dr_aligned
1258 && vect_print_dump_info (REPORT_ALIGNMENT))
1259 fprintf (vect_dump, "Vectorizing an unaligned access.");
1265 /* Function vect_enhance_data_refs_alignment
1267 This pass will use loop versioning and loop peeling in order to enhance
1268 the alignment of data references in the loop.
1270 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1271 original loop is to be vectorized; Any other loops that are created by
1272 the transformations performed in this pass - are not supposed to be
1273 vectorized. This restriction will be relaxed.
1275 This pass will require a cost model to guide it whether to apply peeling
1276 or versioning or a combination of the two. For example, the scheme that
1277 intel uses when given a loop with several memory accesses, is as follows:
1278 choose one memory access ('p') which alignment you want to force by doing
1279 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1280 other accesses are not necessarily aligned, or (2) use loop versioning to
1281 generate one loop in which all accesses are aligned, and another loop in
1282 which only 'p' is necessarily aligned.
1284 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1285 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1286 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1288 Devising a cost model is the most critical aspect of this work. It will
1289 guide us on which access to peel for, whether to use loop versioning, how
1290 many versions to create, etc. The cost model will probably consist of
1291 generic considerations as well as target specific considerations (on
1292 powerpc for example, misaligned stores are more painful than misaligned
1295 Here are the general steps involved in alignment enhancements:
1297 -- original loop, before alignment analysis:
1298 for (i=0; i<N; i++){
1299 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1300 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1303 -- After vect_compute_data_refs_alignment:
1304 for (i=0; i<N; i++){
1305 x = q[i]; # DR_MISALIGNMENT(q) = 3
1306 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1309 -- Possibility 1: we do loop versioning:
1311 for (i=0; i<N; i++){ # loop 1A
1312 x = q[i]; # DR_MISALIGNMENT(q) = 3
1313 p[i] = y; # DR_MISALIGNMENT(p) = 0
1317 for (i=0; i<N; i++){ # loop 1B
1318 x = q[i]; # DR_MISALIGNMENT(q) = 3
1319 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1323 -- Possibility 2: we do loop peeling:
1324 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1328 for (i = 3; i < N; i++){ # loop 2A
1329 x = q[i]; # DR_MISALIGNMENT(q) = 0
1330 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1333 -- Possibility 3: combination of loop peeling and versioning:
1334 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1339 for (i = 3; i<N; i++){ # loop 3A
1340 x = q[i]; # DR_MISALIGNMENT(q) = 0
1341 p[i] = y; # DR_MISALIGNMENT(p) = 0
1345 for (i = 3; i<N; i++){ # loop 3B
1346 x = q[i]; # DR_MISALIGNMENT(q) = 0
1347 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1351 These loops are later passed to loop_transform to be vectorized. The
1352 vectorizer will use the alignment information to guide the transformation
1353 (whether to generate regular loads/stores, or with special handling for
1357 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1359 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1360 enum dr_alignment_support supportable_dr_alignment;
1361 struct data_reference *dr0 = NULL;
1362 struct data_reference *dr;
1364 bool do_peeling = false;
1365 bool do_versioning = false;
1368 stmt_vec_info stmt_info;
1370 if (vect_print_dump_info (REPORT_DETAILS))
1371 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1373 /* While cost model enhancements are expected in the future, the high level
1374 view of the code at this time is as follows:
1376 A) If there is a misaligned write then see if peeling to align this write
1377 can make all data references satisfy vect_supportable_dr_alignment.
1378 If so, update data structures as needed and return true. Note that
1379 at this time vect_supportable_dr_alignment is known to return false
1380 for a a misaligned write.
1382 B) If peeling wasn't possible and there is a data reference with an
1383 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1384 then see if loop versioning checks can be used to make all data
1385 references satisfy vect_supportable_dr_alignment. If so, update
1386 data structures as needed and return true.
1388 C) If neither peeling nor versioning were successful then return false if
1389 any data reference does not satisfy vect_supportable_dr_alignment.
1391 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1393 Note, Possibility 3 above (which is peeling and versioning together) is not
1394 being done at this time. */
1396 /* (1) Peeling to force alignment. */
1398 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1400 + How many accesses will become aligned due to the peeling
1401 - How many accesses will become unaligned due to the peeling,
1402 and the cost of misaligned accesses.
1403 - The cost of peeling (the extra runtime checks, the increase
1406 The scheme we use FORNOW: peel to force the alignment of the first
1407 misaligned store in the loop.
1408 Rationale: misaligned stores are not yet supported.
1410 TODO: Use a cost model. */
1412 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1414 stmt = DR_STMT (dr);
1415 stmt_info = vinfo_for_stmt (stmt);
1417 /* For interleaving, only the alignment of the first access
1419 if (DR_GROUP_FIRST_DR (stmt_info)
1420 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1423 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1425 if (DR_GROUP_FIRST_DR (stmt_info))
1427 /* For interleaved access we peel only if number of iterations in
1428 the prolog loop ({VF - misalignment}), is a multiple of the
1429 number of the interleaved accesses. */
1430 int elem_size, mis_in_elements;
1431 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1433 /* FORNOW: handle only known alignment. */
1434 if (!known_alignment_for_access_p (dr))
1440 elem_size = UNITS_PER_SIMD_WORD / vf;
1441 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1443 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1455 /* Often peeling for alignment will require peeling for loop-bound, which in
1456 turn requires that we know how to adjust the loop ivs after the loop. */
1457 if (!vect_can_advance_ivs_p (loop_vinfo))
1465 if (known_alignment_for_access_p (dr0))
1467 /* Since it's known at compile time, compute the number of iterations
1468 in the peeled loop (the peeling factor) for use in updating
1469 DR_MISALIGNMENT values. The peeling factor is the vectorization
1470 factor minus the misalignment as an element count. */
1471 mis = DR_MISALIGNMENT (dr0);
1472 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1473 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1475 /* For interleaved data access every iteration accesses all the
1476 members of the group, therefore we divide the number of iterations
1477 by the group size. */
1478 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1479 if (DR_GROUP_FIRST_DR (stmt_info))
1480 npeel /= DR_GROUP_SIZE (stmt_info);
1482 if (vect_print_dump_info (REPORT_DETAILS))
1483 fprintf (vect_dump, "Try peeling by %d", npeel);
1486 /* Ensure that all data refs can be vectorized after the peel. */
1487 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1489 int save_misalignment;
1494 stmt = DR_STMT (dr);
1495 stmt_info = vinfo_for_stmt (stmt);
1496 /* For interleaving, only the alignment of the first access
1498 if (DR_GROUP_FIRST_DR (stmt_info)
1499 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1502 save_misalignment = DR_MISALIGNMENT (dr);
1503 vect_update_misalignment_for_peel (dr, dr0, npeel);
1504 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1505 DR_MISALIGNMENT (dr) = save_misalignment;
1507 if (!supportable_dr_alignment)
1516 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1517 If the misalignment of DR_i is identical to that of dr0 then set
1518 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1519 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1520 by the peeling factor times the element size of DR_i (MOD the
1521 vectorization factor times the size). Otherwise, the
1522 misalignment of DR_i must be set to unknown. */
1523 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1525 vect_update_misalignment_for_peel (dr, dr0, npeel);
1527 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1528 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1529 DR_MISALIGNMENT (dr0) = 0;
1530 if (vect_print_dump_info (REPORT_ALIGNMENT))
1531 fprintf (vect_dump, "Alignment of access forced using peeling.");
1533 if (vect_print_dump_info (REPORT_DETAILS))
1534 fprintf (vect_dump, "Peeling for alignment will be applied.");
1536 stat = vect_verify_datarefs_alignment (loop_vinfo);
1543 /* (2) Versioning to force alignment. */
1545 /* Try versioning if:
1546 1) flag_tree_vect_loop_version is TRUE
1547 2) optimize_size is FALSE
1548 3) there is at least one unsupported misaligned data ref with an unknown
1550 4) all misaligned data refs with a known misalignment are supported, and
1551 5) the number of runtime alignment checks is within reason. */
1553 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1557 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1559 stmt = DR_STMT (dr);
1560 stmt_info = vinfo_for_stmt (stmt);
1562 /* For interleaving, only the alignment of the first access
1564 if (aligned_access_p (dr)
1565 || (DR_GROUP_FIRST_DR (stmt_info)
1566 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1569 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1571 if (!supportable_dr_alignment)
1577 if (known_alignment_for_access_p (dr)
1578 || VEC_length (tree,
1579 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1580 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1582 do_versioning = false;
1586 stmt = DR_STMT (dr);
1587 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1588 gcc_assert (vectype);
1590 /* The rightmost bits of an aligned address must be zeros.
1591 Construct the mask needed for this test. For example,
1592 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1593 mask must be 15 = 0xf. */
1594 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1596 /* FORNOW: use the same mask to test all potentially unaligned
1597 references in the loop. The vectorizer currently supports
1598 a single vector size, see the reference to
1599 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1600 vectorization factor is computed. */
1601 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1602 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1603 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1604 VEC_safe_push (tree, heap,
1605 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1610 /* Versioning requires at least one misaligned data reference. */
1611 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1612 do_versioning = false;
1613 else if (!do_versioning)
1614 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1619 VEC(tree,heap) *may_misalign_stmts
1620 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1623 /* It can now be assumed that the data references in the statements
1624 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1625 of the loop being vectorized. */
1626 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1628 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1629 dr = STMT_VINFO_DATA_REF (stmt_info);
1630 DR_MISALIGNMENT (dr) = 0;
1631 if (vect_print_dump_info (REPORT_ALIGNMENT))
1632 fprintf (vect_dump, "Alignment of access forced using versioning.");
1635 if (vect_print_dump_info (REPORT_DETAILS))
1636 fprintf (vect_dump, "Versioning for alignment will be applied.");
1638 /* Peeling and versioning can't be done together at this time. */
1639 gcc_assert (! (do_peeling && do_versioning));
1641 stat = vect_verify_datarefs_alignment (loop_vinfo);
1646 /* This point is reached if neither peeling nor versioning is being done. */
1647 gcc_assert (! (do_peeling || do_versioning));
1649 stat = vect_verify_datarefs_alignment (loop_vinfo);
1654 /* Function vect_analyze_data_refs_alignment
1656 Analyze the alignment of the data-references in the loop.
1657 Return FALSE if a data reference is found that cannot be vectorized. */
1660 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1662 if (vect_print_dump_info (REPORT_DETAILS))
1663 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1665 if (!vect_compute_data_refs_alignment (loop_vinfo))
1667 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1669 "not vectorized: can't calculate alignment for data ref.");
1677 /* Function vect_analyze_data_ref_access.
1679 Analyze the access pattern of the data-reference DR. For now, a data access
1680 has to be consecutive to be considered vectorizable. */
1683 vect_analyze_data_ref_access (struct data_reference *dr)
1685 tree step = DR_STEP (dr);
1686 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1687 tree scalar_type = TREE_TYPE (DR_REF (dr));
1688 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1689 tree stmt = DR_STMT (dr);
1690 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1691 interleaving group (including gaps). */
1692 HOST_WIDE_INT stride = dr_step / type_size;
1696 if (vect_print_dump_info (REPORT_DETAILS))
1697 fprintf (vect_dump, "bad data-ref access");
1702 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1704 /* Mark that it is not interleaving. */
1705 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1709 /* Not consecutive access is possible only if it is a part of interleaving. */
1710 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1712 /* Check if it this DR is a part of interleaving, and is a single
1713 element of the group that is accessed in the loop. */
1715 /* Gaps are supported only for loads. STEP must be a multiple of the type
1716 size. The size of the group must be a power of 2. */
1718 && (dr_step % type_size) == 0
1720 && exact_log2 (stride) != -1)
1722 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1723 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1724 if (vect_print_dump_info (REPORT_DR_DETAILS))
1726 fprintf (vect_dump, "Detected single element interleaving %d ",
1727 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1728 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1729 fprintf (vect_dump, " step ");
1730 print_generic_expr (vect_dump, step, TDF_SLIM);
1734 if (vect_print_dump_info (REPORT_DETAILS))
1735 fprintf (vect_dump, "not consecutive access");
1739 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1741 /* First stmt in the interleaving chain. Check the chain. */
1742 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1743 struct data_reference *data_ref = dr;
1744 unsigned int count = 1;
1746 tree prev_init = DR_INIT (data_ref);
1748 HOST_WIDE_INT diff, count_in_bytes;
1752 /* Skip same data-refs. In case that two or more stmts share data-ref
1753 (supported only for loads), we vectorize only the first stmt, and
1754 the rest get their vectorized loads from the the first one. */
1755 if (!tree_int_cst_compare (DR_INIT (data_ref),
1756 DR_INIT (STMT_VINFO_DATA_REF (
1757 vinfo_for_stmt (next)))))
1759 if (!DR_IS_READ (data_ref))
1761 if (vect_print_dump_info (REPORT_DETAILS))
1762 fprintf (vect_dump, "Two store stmts share the same dr.");
1766 /* Check that there is no load-store dependencies for this loads
1767 to prevent a case of load-store-load to the same location. */
1768 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1769 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1771 if (vect_print_dump_info (REPORT_DETAILS))
1773 "READ_WRITE dependence in interleaving.");
1777 /* For load use the same data-ref load. */
1778 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1781 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1786 /* Check that all the accesses have the same STEP. */
1787 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1788 if (tree_int_cst_compare (step, next_step))
1790 if (vect_print_dump_info (REPORT_DETAILS))
1791 fprintf (vect_dump, "not consecutive access in interleaving");
1795 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1796 /* Check that the distance between two accesses is equal to the type
1797 size. Otherwise, we have gaps. */
1798 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1799 - TREE_INT_CST_LOW (prev_init)) / type_size;
1800 if (!DR_IS_READ (data_ref) && diff != 1)
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 fprintf (vect_dump, "interleaved store with gaps");
1806 /* Store the gap from the previous member of the group. If there is no
1807 gap in the access, DR_GROUP_GAP is always 1. */
1808 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1810 prev_init = DR_INIT (data_ref);
1811 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1812 /* Count the number of data-refs in the chain. */
1816 /* COUNT is the number of accesses found, we multiply it by the size of
1817 the type to get COUNT_IN_BYTES. */
1818 count_in_bytes = type_size * count;
1820 /* Check that the size of the interleaving is not greater than STEP. */
1821 if (dr_step < count_in_bytes)
1823 if (vect_print_dump_info (REPORT_DETAILS))
1825 fprintf (vect_dump, "interleaving size is greater than step for ");
1826 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1831 /* Check that the size of the interleaving is equal to STEP for stores,
1832 i.e., that there are no gaps. */
1833 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1835 if (vect_print_dump_info (REPORT_DETAILS))
1836 fprintf (vect_dump, "interleaved store with gaps");
1840 /* Check that STEP is a multiple of type size. */
1841 if ((dr_step % type_size) != 0)
1843 if (vect_print_dump_info (REPORT_DETAILS))
1845 fprintf (vect_dump, "step is not a multiple of type size: step ");
1846 print_generic_expr (vect_dump, step, TDF_SLIM);
1847 fprintf (vect_dump, " size ");
1848 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1854 /* FORNOW: we handle only interleaving that is a power of 2. */
1855 if (exact_log2 (stride) == -1)
1857 if (vect_print_dump_info (REPORT_DETAILS))
1858 fprintf (vect_dump, "interleaving is not a power of 2");
1861 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1867 /* Function vect_analyze_data_ref_accesses.
1869 Analyze the access pattern of all the data references in the loop.
1871 FORNOW: the only access pattern that is considered vectorizable is a
1872 simple step 1 (consecutive) access.
1874 FORNOW: handle only arrays and pointer accesses. */
1877 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1880 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1881 struct data_reference *dr;
1883 if (vect_print_dump_info (REPORT_DETAILS))
1884 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1886 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1887 if (!vect_analyze_data_ref_access (dr))
1889 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1890 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1898 /* Function vect_analyze_data_refs.
1900 Find all the data references in the loop.
1902 The general structure of the analysis of data refs in the vectorizer is as
1904 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1905 find and analyze all data-refs in the loop and their dependences.
1906 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1907 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1908 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1913 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1915 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1917 VEC (data_reference_p, heap) *datarefs;
1918 struct data_reference *dr;
1921 if (vect_print_dump_info (REPORT_DETAILS))
1922 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1924 compute_data_dependences_for_loop (loop, true,
1925 &LOOP_VINFO_DATAREFS (loop_vinfo),
1926 &LOOP_VINFO_DDRS (loop_vinfo));
1928 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1929 from stmt_vec_info struct to DR and vectype. */
1930 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1932 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1935 stmt_vec_info stmt_info;
1937 if (!dr || !DR_REF (dr))
1939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1940 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1944 /* Update DR field in stmt_vec_info struct. */
1945 stmt = DR_STMT (dr);
1946 stmt_info = vinfo_for_stmt (stmt);
1948 if (STMT_VINFO_DATA_REF (stmt_info))
1950 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1953 "not vectorized: more than one data ref in stmt: ");
1954 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1958 STMT_VINFO_DATA_REF (stmt_info) = dr;
1960 /* Check that analysis of the data-ref succeeded. */
1961 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1964 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1966 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1967 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1971 if (!DR_MEMTAG (dr))
1973 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1975 fprintf (vect_dump, "not vectorized: no memory tag for ");
1976 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1981 /* Set vectype for STMT. */
1982 scalar_type = TREE_TYPE (DR_REF (dr));
1983 STMT_VINFO_VECTYPE (stmt_info) =
1984 get_vectype_for_scalar_type (scalar_type);
1985 if (!STMT_VINFO_VECTYPE (stmt_info))
1987 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1990 "not vectorized: no vectype for stmt: ");
1991 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1992 fprintf (vect_dump, " scalar_type: ");
1993 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2003 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2005 /* Function vect_mark_relevant.
2007 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2010 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2011 enum vect_relevant relevant, bool live_p)
2013 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2014 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2015 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2017 if (vect_print_dump_info (REPORT_DETAILS))
2018 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2020 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2024 /* This is the last stmt in a sequence that was detected as a
2025 pattern that can potentially be vectorized. Don't mark the stmt
2026 as relevant/live because it's not going to vectorized.
2027 Instead mark the pattern-stmt that replaces it. */
2028 if (vect_print_dump_info (REPORT_DETAILS))
2029 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2030 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2031 stmt_info = vinfo_for_stmt (pattern_stmt);
2032 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2033 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2034 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2035 stmt = pattern_stmt;
2038 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2039 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2040 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2042 if (TREE_CODE (stmt) == PHI_NODE)
2043 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2044 or live will fail vectorization later on. */
2047 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2048 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2050 if (vect_print_dump_info (REPORT_DETAILS))
2051 fprintf (vect_dump, "already marked relevant/live.");
2055 VEC_safe_push (tree, heap, *worklist, stmt);
2059 /* Function vect_stmt_relevant_p.
2061 Return true if STMT in loop that is represented by LOOP_VINFO is
2062 "relevant for vectorization".
2064 A stmt is considered "relevant for vectorization" if:
2065 - it has uses outside the loop.
2066 - it has vdefs (it alters memory).
2067 - control stmts in the loop (except for the exit condition).
2069 CHECKME: what other side effects would the vectorizer allow? */
2072 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2073 enum vect_relevant *relevant, bool *live_p)
2075 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2076 ssa_op_iter op_iter;
2077 imm_use_iterator imm_iter;
2078 use_operand_p use_p;
2079 def_operand_p def_p;
2081 *relevant = vect_unused_in_loop;
2084 /* cond stmt other than loop exit cond. */
2085 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2086 *relevant = vect_used_in_loop;
2088 /* changing memory. */
2089 if (TREE_CODE (stmt) != PHI_NODE)
2090 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2092 if (vect_print_dump_info (REPORT_DETAILS))
2093 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2094 *relevant = vect_used_in_loop;
2097 /* uses outside the loop. */
2098 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2100 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2102 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2103 if (!flow_bb_inside_loop_p (loop, bb))
2105 if (vect_print_dump_info (REPORT_DETAILS))
2106 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2108 /* We expect all such uses to be in the loop exit phis
2109 (because of loop closed form) */
2110 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2111 gcc_assert (bb == single_exit (loop)->dest);
2118 return (*live_p || *relevant);
2122 /* Function vect_mark_stmts_to_be_vectorized.
2124 Not all stmts in the loop need to be vectorized. For example:
2133 Stmt 1 and 3 do not need to be vectorized, because loop control and
2134 addressing of vectorized data-refs are handled differently.
2136 This pass detects such stmts. */
2139 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2141 VEC(tree,heap) *worklist;
2142 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2143 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2144 unsigned int nbbs = loop->num_nodes;
2145 block_stmt_iterator si;
2150 stmt_vec_info stmt_vinfo;
2154 enum vect_relevant relevant;
2156 enum vect_def_type dt;
2158 if (vect_print_dump_info (REPORT_DETAILS))
2159 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2161 worklist = VEC_alloc (tree, heap, 64);
2163 /* 1. Init worklist. */
2166 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2168 if (vect_print_dump_info (REPORT_DETAILS))
2170 fprintf (vect_dump, "init: phi relevant? ");
2171 print_generic_expr (vect_dump, phi, TDF_SLIM);
2174 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2175 vect_mark_relevant (&worklist, phi, relevant, live_p);
2178 for (i = 0; i < nbbs; i++)
2181 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2183 stmt = bsi_stmt (si);
2185 if (vect_print_dump_info (REPORT_DETAILS))
2187 fprintf (vect_dump, "init: stmt relevant? ");
2188 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2191 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2192 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2197 /* 2. Process_worklist */
2199 while (VEC_length (tree, worklist) > 0)
2201 stmt = VEC_pop (tree, worklist);
2203 if (vect_print_dump_info (REPORT_DETAILS))
2205 fprintf (vect_dump, "worklist: examine stmt: ");
2206 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2209 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2210 in the loop, mark the stmt that defines it (DEF_STMT) as
2211 relevant/irrelevant and live/dead according to the liveness and
2212 relevance properties of STMT.
2215 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2217 ann = stmt_ann (stmt);
2218 stmt_vinfo = vinfo_for_stmt (stmt);
2220 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2221 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2223 /* Generally, the liveness and relevance properties of STMT are
2224 propagated to the DEF_STMTs of its USEs:
2225 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2226 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2231 If USE is used only for address computations (e.g. array indexing),
2232 which does not need to be directly vectorized, then the
2233 liveness/relevance of the respective DEF_STMT is left unchanged.
2236 If STMT has been identified as defining a reduction variable, then
2239 The last use of STMT is the reduction-variable, which is defined
2240 by a loop-header-phi. We don't want to mark the phi as live or
2241 relevant (because it does not need to be vectorized, it is handled
2242 as part of the vectorization of the reduction), so in this case we
2243 skip the call to vect_mark_relevant.
2245 The rest of the uses of STMT are defined in the loop body. For
2246 the def_stmt of these uses we want to set liveness/relevance
2248 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2249 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2250 because even though STMT is classified as live (since it defines a
2251 value that is used across loop iterations) and irrelevant (since it
2252 is not used inside the loop), it will be vectorized, and therefore
2253 the corresponding DEF_STMTs need to marked as relevant.
2254 We distinguish between two kinds of relevant stmts - those that are
2255 used by a reduction computation, and those that are (also) used by
2256 a regular computation. This allows us later on to identify stmts
2257 that are used solely by a reduction, and therefore the order of
2258 the results that they produce does not have to be kept.
2262 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2264 gcc_assert (relevant == vect_unused_in_loop && live_p);
2265 relevant = vect_used_by_reduction;
2270 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2272 if (vect_print_dump_info (REPORT_DETAILS))
2274 fprintf (vect_dump, "worklist: examine use %d: ", i++);
2275 print_generic_expr (vect_dump, use, TDF_SLIM);
2278 /* case 1: we are only interested in uses that need to be vectorized.
2279 Uses that are used for address computation are not considered
2282 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2285 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2287 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2288 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2289 VEC_free (tree, heap, worklist);
2293 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2296 bb = bb_for_stmt (def_stmt);
2297 if (!flow_bb_inside_loop_p (loop, bb))
2300 /* case 2.1: the reduction-use does not mark the defining-phi
2302 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2303 && TREE_CODE (def_stmt) == PHI_NODE)
2306 if (dt == vect_induction_def && TREE_CODE (def_stmt) == PHI_NODE)
2309 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2311 } /* while worklist */
2313 VEC_free (tree, heap, worklist);
2318 /* Function vect_can_advance_ivs_p
2320 In case the number of iterations that LOOP iterates is unknown at compile
2321 time, an epilog loop will be generated, and the loop induction variables
2322 (IVs) will be "advanced" to the value they are supposed to take just before
2323 the epilog loop. Here we check that the access function of the loop IVs
2324 and the expression that represents the loop bound are simple enough.
2325 These restrictions will be relaxed in the future. */
2328 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2330 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2331 basic_block bb = loop->header;
2334 /* Analyze phi functions of the loop header. */
2336 if (vect_print_dump_info (REPORT_DETAILS))
2337 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2339 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2341 tree access_fn = NULL;
2342 tree evolution_part;
2344 if (vect_print_dump_info (REPORT_DETAILS))
2346 fprintf (vect_dump, "Analyze phi: ");
2347 print_generic_expr (vect_dump, phi, TDF_SLIM);
2350 /* Skip virtual phi's. The data dependences that are associated with
2351 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2353 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2355 if (vect_print_dump_info (REPORT_DETAILS))
2356 fprintf (vect_dump, "virtual phi. skip.");
2360 /* Skip reduction phis. */
2362 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "reduc phi. skip.");
2369 /* Analyze the evolution function. */
2371 access_fn = instantiate_parameters
2372 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2376 if (vect_print_dump_info (REPORT_DETAILS))
2377 fprintf (vect_dump, "No Access function.");
2381 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "Access function of PHI: ");
2384 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2387 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2389 if (evolution_part == NULL_TREE)
2391 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "No evolution.");
2396 /* FORNOW: We do not transform initial conditions of IVs
2397 which evolution functions are a polynomial of degree >= 2. */
2399 if (tree_is_chrec (evolution_part))
2407 /* Function vect_get_loop_niters.
2409 Determine how many iterations the loop is executed.
2410 If an expression that represents the number of iterations
2411 can be constructed, place it in NUMBER_OF_ITERATIONS.
2412 Return the loop exit condition. */
2415 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2419 if (vect_print_dump_info (REPORT_DETAILS))
2420 fprintf (vect_dump, "=== get_loop_niters ===");
2422 niters = number_of_exit_cond_executions (loop);
2424 if (niters != NULL_TREE
2425 && niters != chrec_dont_know)
2427 *number_of_iterations = niters;
2429 if (vect_print_dump_info (REPORT_DETAILS))
2431 fprintf (vect_dump, "==> get_loop_niters:" );
2432 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2436 return get_loop_exit_condition (loop);
2440 /* Function vect_analyze_loop_form.
2442 Verify the following restrictions (some may be relaxed in the future):
2443 - it's an inner-most loop
2444 - number of BBs = 2 (which are the loop header and the latch)
2445 - the loop has a pre-header
2446 - the loop has a single entry and exit
2447 - the loop exit condition is simple enough, and the number of iterations
2448 can be analyzed (a countable loop). */
2450 static loop_vec_info
2451 vect_analyze_loop_form (struct loop *loop)
2453 loop_vec_info loop_vinfo;
2455 tree number_of_iterations = NULL;
2457 if (vect_print_dump_info (REPORT_DETAILS))
2458 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2462 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2463 fprintf (vect_dump, "not vectorized: nested loop.");
2467 if (!single_exit (loop)
2468 || loop->num_nodes != 2
2469 || EDGE_COUNT (loop->header->preds) != 2)
2471 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2473 if (!single_exit (loop))
2474 fprintf (vect_dump, "not vectorized: multiple exits.");
2475 else if (loop->num_nodes != 2)
2476 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2477 else if (EDGE_COUNT (loop->header->preds) != 2)
2478 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2484 /* We assume that the loop exit condition is at the end of the loop. i.e,
2485 that the loop is represented as a do-while (with a proper if-guard
2486 before the loop if needed), where the loop header contains all the
2487 executable statements, and the latch is empty. */
2488 if (!empty_block_p (loop->latch)
2489 || phi_nodes (loop->latch))
2491 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2492 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2496 /* Make sure there exists a single-predecessor exit bb: */
2497 if (!single_pred_p (single_exit (loop)->dest))
2499 edge e = single_exit (loop);
2500 if (!(e->flags & EDGE_ABNORMAL))
2502 split_loop_exit_edge (e);
2503 if (vect_print_dump_info (REPORT_DETAILS))
2504 fprintf (vect_dump, "split exit edge.");
2508 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2509 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2514 if (empty_block_p (loop->header))
2516 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2517 fprintf (vect_dump, "not vectorized: empty loop.");
2521 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2524 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2525 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2529 if (!number_of_iterations)
2531 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2533 "not vectorized: number of iterations cannot be computed.");
2537 if (chrec_contains_undetermined (number_of_iterations))
2539 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2540 fprintf (vect_dump, "Infinite number of iterations.");
2544 loop_vinfo = new_loop_vec_info (loop);
2545 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2547 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2549 if (vect_print_dump_info (REPORT_DETAILS))
2551 fprintf (vect_dump, "Symbolic number of iterations is ");
2552 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2556 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2558 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2559 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2563 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2569 /* Function vect_analyze_loop.
2571 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2572 for it. The different analyses will record information in the
2573 loop_vec_info struct. */
2575 vect_analyze_loop (struct loop *loop)
2578 loop_vec_info loop_vinfo;
2580 if (vect_print_dump_info (REPORT_DETAILS))
2581 fprintf (vect_dump, "===== analyze_loop_nest =====");
2583 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2585 loop_vinfo = vect_analyze_loop_form (loop);
2588 if (vect_print_dump_info (REPORT_DETAILS))
2589 fprintf (vect_dump, "bad loop form.");
2593 /* Find all data references in the loop (which correspond to vdefs/vuses)
2594 and analyze their evolution in the loop.
2596 FORNOW: Handle only simple, array references, which
2597 alignment can be forced, and aligned pointer-references. */
2599 ok = vect_analyze_data_refs (loop_vinfo);
2602 if (vect_print_dump_info (REPORT_DETAILS))
2603 fprintf (vect_dump, "bad data references.");
2604 destroy_loop_vec_info (loop_vinfo);
2608 /* Classify all cross-iteration scalar data-flow cycles.
2609 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2611 vect_analyze_scalar_cycles (loop_vinfo);
2613 vect_pattern_recog (loop_vinfo);
2615 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2617 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2620 if (vect_print_dump_info (REPORT_DETAILS))
2621 fprintf (vect_dump, "unexpected pattern.");
2622 destroy_loop_vec_info (loop_vinfo);
2626 /* Analyze the alignment of the data-refs in the loop.
2627 Fail if a data reference is found that cannot be vectorized. */
2629 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2632 if (vect_print_dump_info (REPORT_DETAILS))
2633 fprintf (vect_dump, "bad data alignment.");
2634 destroy_loop_vec_info (loop_vinfo);
2638 ok = vect_determine_vectorization_factor (loop_vinfo);
2641 if (vect_print_dump_info (REPORT_DETAILS))
2642 fprintf (vect_dump, "can't determine vectorization factor.");
2643 destroy_loop_vec_info (loop_vinfo);
2647 /* Analyze data dependences between the data-refs in the loop.
2648 FORNOW: fail at the first data dependence that we encounter. */
2650 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2653 if (vect_print_dump_info (REPORT_DETAILS))
2654 fprintf (vect_dump, "bad data dependence.");
2655 destroy_loop_vec_info (loop_vinfo);
2659 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2660 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2662 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2665 if (vect_print_dump_info (REPORT_DETAILS))
2666 fprintf (vect_dump, "bad data access.");
2667 destroy_loop_vec_info (loop_vinfo);
2671 /* This pass will decide on using loop versioning and/or loop peeling in
2672 order to enhance the alignment of data references in the loop. */
2674 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2677 if (vect_print_dump_info (REPORT_DETAILS))
2678 fprintf (vect_dump, "bad data alignment.");
2679 destroy_loop_vec_info (loop_vinfo);
2683 /* Scan all the operations in the loop and make sure they are
2686 ok = vect_analyze_operations (loop_vinfo);
2689 if (vect_print_dump_info (REPORT_DETAILS))
2690 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2691 destroy_loop_vec_info (loop_vinfo);
2695 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;