1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
104 stmt_vec_info stmt_info;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
179 gcc_assert (stmt_info);
181 /* skip stmts which do not need to be vectorized. */
182 if (!STMT_VINFO_RELEVANT_P (stmt_info)
183 && !STMT_VINFO_LIVE_P (stmt_info))
185 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "skip.");
190 if (!GIMPLE_STMT_P (stmt)
191 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
193 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
195 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
196 print_generic_expr (vect_dump, stmt, TDF_SLIM);
201 if (STMT_VINFO_VECTYPE (stmt_info))
203 /* The only case when a vectype had been already set is for stmts
204 that contain a dataref, or for "pattern-stmts" (stmts generated
205 by the vectorizer to represent/replace a certain idiom). */
206 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
207 || is_pattern_stmt_p (stmt_info));
208 vectype = STMT_VINFO_VECTYPE (stmt_info);
212 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
213 && !is_pattern_stmt_p (stmt_info));
215 /* We set the vectype according to the type of the result (lhs).
216 For stmts whose result-type is different than the type of the
217 arguments (e.g. demotion, promotion), vectype will be reset
218 appropriately (later). Note that we have to visit the smallest
219 datatype in this function, because that determines the VF.
220 If the smallest datatype in the loop is present only as the
221 rhs of a promotion operation - we'd miss it here.
222 However, in such a case, that a variable of this datatype
223 does not appear in the lhs anywhere in the loop, it shouldn't
224 affect the vectorization factor. */
225 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
227 if (vect_print_dump_info (REPORT_DETAILS))
229 fprintf (vect_dump, "get vectype for scalar type: ");
230 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
233 vectype = get_vectype_for_scalar_type (scalar_type);
236 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
239 "not vectorized: unsupported data-type ");
240 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
244 STMT_VINFO_VECTYPE (stmt_info) = vectype;
247 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vectype: ");
250 print_generic_expr (vect_dump, vectype, TDF_SLIM);
253 nunits = TYPE_VECTOR_SUBPARTS (vectype);
254 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "nunits = %d", nunits);
257 if (!vectorization_factor
258 || (nunits > vectorization_factor))
259 vectorization_factor = nunits;
264 /* TODO: Analyze cost. Decide if worth while to vectorize. */
265 if (vect_print_dump_info (REPORT_DETAILS))
266 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
267 if (vectorization_factor <= 1)
269 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
270 fprintf (vect_dump, "not vectorized: unsupported data-type");
273 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
279 /* Function vect_analyze_operations.
281 Scan the loop stmts and make sure they are all vectorizable. */
284 vect_analyze_operations (loop_vec_info loop_vinfo)
286 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
287 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
288 int nbbs = loop->num_nodes;
289 block_stmt_iterator si;
290 unsigned int vectorization_factor = 0;
294 stmt_vec_info stmt_info;
295 bool need_to_vectorize = false;
297 if (vect_print_dump_info (REPORT_DETAILS))
298 fprintf (vect_dump, "=== vect_analyze_operations ===");
300 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
301 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
303 for (i = 0; i < nbbs; i++)
305 basic_block bb = bbs[i];
307 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
311 stmt_info = vinfo_for_stmt (phi);
312 if (vect_print_dump_info (REPORT_DETAILS))
314 fprintf (vect_dump, "examining phi: ");
315 print_generic_expr (vect_dump, phi, TDF_SLIM);
318 gcc_assert (stmt_info);
320 if (STMT_VINFO_LIVE_P (stmt_info))
322 /* FORNOW: not yet supported. */
323 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
324 fprintf (vect_dump, "not vectorized: value used after loop.");
328 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
329 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
331 /* A scalar-dependence cycle that we don't support. */
332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
333 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
337 if (STMT_VINFO_RELEVANT_P (stmt_info))
339 need_to_vectorize = true;
340 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
341 ok = vectorizable_induction (phi, NULL, NULL);
346 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
349 "not vectorized: relevant phi not supported: ");
350 print_generic_expr (vect_dump, phi, TDF_SLIM);
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
358 tree stmt = bsi_stmt (si);
359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
360 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
362 if (vect_print_dump_info (REPORT_DETAILS))
364 fprintf (vect_dump, "==> examining statement: ");
365 print_generic_expr (vect_dump, stmt, TDF_SLIM);
368 gcc_assert (stmt_info);
370 /* skip stmts which do not need to be vectorized.
371 this is expected to include:
372 - the COND_EXPR which is the loop exit condition
373 - any LABEL_EXPRs in the loop
374 - computations that are used only for array indexing or loop
377 if (!STMT_VINFO_RELEVANT_P (stmt_info)
378 && !STMT_VINFO_LIVE_P (stmt_info))
380 if (vect_print_dump_info (REPORT_DETAILS))
381 fprintf (vect_dump, "irrelevant.");
385 switch (STMT_VINFO_DEF_TYPE (stmt_info))
390 case vect_reduction_def:
391 gcc_assert (relevance == vect_unused_in_loop);
394 case vect_induction_def:
395 case vect_constant_def:
396 case vect_invariant_def:
397 case vect_unknown_def_type:
402 if (STMT_VINFO_RELEVANT_P (stmt_info))
404 gcc_assert (GIMPLE_STMT_P (stmt)
405 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
406 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
407 need_to_vectorize = true;
410 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
411 || vectorizable_type_demotion (stmt, NULL, NULL)
412 || vectorizable_conversion (stmt, NULL, NULL)
413 || vectorizable_operation (stmt, NULL, NULL)
414 || vectorizable_assignment (stmt, NULL, NULL)
415 || vectorizable_load (stmt, NULL, NULL)
416 || vectorizable_call (stmt, NULL, NULL)
417 || vectorizable_store (stmt, NULL, NULL)
418 || vectorizable_condition (stmt, NULL, NULL)
419 || vectorizable_reduction (stmt, NULL, NULL));
421 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
422 need extra handling, except for vectorizable reductions. */
423 if (STMT_VINFO_LIVE_P (stmt_info)
424 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
425 ok |= vectorizable_live_operation (stmt, NULL, NULL);
429 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
431 fprintf (vect_dump, "not vectorized: stmt not supported: ");
432 print_generic_expr (vect_dump, stmt, TDF_SLIM);
439 /* TODO: Analyze cost. Decide if worth while to vectorize. */
441 /* All operations in the loop are either irrelevant (deal with loop
442 control, or dead), or only used outside the loop and can be moved
443 out of the loop (e.g. invariants, inductions). The loop can be
444 optimized away by scalar optimizations. We're better off not
445 touching this loop. */
446 if (!need_to_vectorize)
448 if (vect_print_dump_info (REPORT_DETAILS))
450 "All the computation can be taken out of the loop.");
451 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
453 "not vectorized: redundant loop. no profit to vectorize.");
457 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
458 && vect_print_dump_info (REPORT_DETAILS))
460 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
461 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
463 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
464 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
465 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
466 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
467 * vectorization_factor))))
469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
470 fprintf (vect_dump, "not vectorized: iteration count too small.");
474 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
475 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
476 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
478 if (vect_print_dump_info (REPORT_DETAILS))
479 fprintf (vect_dump, "epilog loop required.");
480 if (!vect_can_advance_ivs_p (loop_vinfo))
482 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
484 "not vectorized: can't create epilog loop 1.");
487 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
489 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
491 "not vectorized: can't create epilog loop 2.");
500 /* Function exist_non_indexing_operands_for_use_p
502 USE is one of the uses attached to STMT. Check if USE is
503 used in STMT for anything other than indexing an array. */
506 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
509 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
511 /* USE corresponds to some operand in STMT. If there is no data
512 reference in STMT, then any operand that corresponds to USE
513 is not indexing an array. */
514 if (!STMT_VINFO_DATA_REF (stmt_info))
517 /* STMT has a data_ref. FORNOW this means that its of one of
521 (This should have been verified in analyze_data_refs).
523 'var' in the second case corresponds to a def, not a use,
524 so USE cannot correspond to any operands that are not used
527 Therefore, all we need to check is if STMT falls into the
528 first case, and whether var corresponds to USE. */
530 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
533 operand = GIMPLE_STMT_OPERAND (stmt, 1);
535 if (TREE_CODE (operand) != SSA_NAME)
545 /* Function vect_analyze_scalar_cycles.
547 Examine the cross iteration def-use cycles of scalar variables, by
548 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
549 following: invariant, induction, reduction, unknown.
551 Some forms of scalar cycles are not yet supported.
553 Example1: reduction: (unsupported yet)
559 Example2: induction: (unsupported yet)
565 Note: the following loop *is* vectorizable:
571 even though it has a def-use cycle caused by the induction variable i:
573 loop: i_2 = PHI (i_0, i_1)
578 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
579 it does not need to be vectorized because it is only used for array
580 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
581 loop2 on the other hand is relevant (it is being written to memory).
585 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
588 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
589 basic_block bb = loop->header;
591 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
593 if (vect_print_dump_info (REPORT_DETAILS))
594 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
596 /* First - identify all inductions. */
597 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
599 tree access_fn = NULL;
600 tree def = PHI_RESULT (phi);
601 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
603 if (vect_print_dump_info (REPORT_DETAILS))
605 fprintf (vect_dump, "Analyze phi: ");
606 print_generic_expr (vect_dump, phi, TDF_SLIM);
609 /* Skip virtual phi's. The data dependences that are associated with
610 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
611 if (!is_gimple_reg (SSA_NAME_VAR (def)))
614 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
616 /* Analyze the evolution function. */
617 access_fn = analyze_scalar_evolution (loop, def);
618 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
620 fprintf (vect_dump, "Access function of PHI: ");
621 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
625 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
627 VEC_safe_push (tree, heap, worklist, phi);
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "Detected induction.");
633 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
637 /* Second - identify all reductions. */
638 while (VEC_length (tree, worklist) > 0)
640 tree phi = VEC_pop (tree, worklist);
641 tree def = PHI_RESULT (phi);
642 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
645 if (vect_print_dump_info (REPORT_DETAILS))
647 fprintf (vect_dump, "Analyze phi: ");
648 print_generic_expr (vect_dump, phi, TDF_SLIM);
651 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
652 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
654 reduc_stmt = vect_is_simple_reduction (loop, phi);
657 if (vect_print_dump_info (REPORT_DETAILS))
658 fprintf (vect_dump, "Detected reduction.");
659 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
660 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
664 if (vect_print_dump_info (REPORT_DETAILS))
665 fprintf (vect_dump, "Unknown def-use cycle pattern.");
668 VEC_free (tree, heap, worklist);
673 /* Function vect_insert_into_interleaving_chain.
675 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
678 vect_insert_into_interleaving_chain (struct data_reference *dra,
679 struct data_reference *drb)
681 tree prev, next, next_init;
682 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
683 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
685 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
686 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
689 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
690 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
693 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
694 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
698 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
701 /* We got to the end of the list. Insert here. */
702 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
703 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
707 /* Function vect_update_interleaving_chain.
709 For two data-refs DRA and DRB that are a part of a chain interleaved data
710 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
712 There are four possible cases:
713 1. New stmts - both DRA and DRB are not a part of any chain:
716 2. DRB is a part of a chain and DRA is not:
717 no need to update FIRST_DR
718 no need to insert DRB
719 insert DRA according to init
720 3. DRA is a part of a chain and DRB is not:
721 if (init of FIRST_DR > init of DRB)
723 NEXT(FIRST_DR) = previous FIRST_DR
725 insert DRB according to its init
726 4. both DRA and DRB are in some interleaving chains:
727 choose the chain with the smallest init of FIRST_DR
728 insert the nodes of the second chain into the first one. */
731 vect_update_interleaving_chain (struct data_reference *drb,
732 struct data_reference *dra)
734 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
735 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
736 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
737 tree node, prev, next, node_init, first_stmt;
739 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
740 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
742 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
743 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
744 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
748 /* 2. DRB is a part of a chain and DRA is not. */
749 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
751 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
752 /* Insert DRA into the chain of DRB. */
753 vect_insert_into_interleaving_chain (dra, drb);
757 /* 3. DRA is a part of a chain and DRB is not. */
758 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
760 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
761 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
765 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
767 /* DRB's init is smaller than the init of the stmt previously marked
768 as the first stmt of the interleaving chain of DRA. Therefore, we
769 update FIRST_STMT and put DRB in the head of the list. */
770 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
771 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
773 /* Update all the stmts in the list to point to the new FIRST_STMT. */
774 tmp = old_first_stmt;
777 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
778 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
783 /* Insert DRB in the list of DRA. */
784 vect_insert_into_interleaving_chain (drb, dra);
785 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
790 /* 4. both DRA and DRB are in some interleaving chains. */
791 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
792 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
793 if (first_a == first_b)
795 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
796 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
798 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
800 /* Insert the nodes of DRA chain into the DRB chain.
801 After inserting a node, continue from this node of the DRB chain (don't
802 start from the beginning. */
803 node = DR_GROUP_FIRST_DR (stmtinfo_a);
804 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
805 first_stmt = first_b;
809 /* Insert the nodes of DRB chain into the DRA chain.
810 After inserting a node, continue from this node of the DRA chain (don't
811 start from the beginning. */
812 node = DR_GROUP_FIRST_DR (stmtinfo_b);
813 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
814 first_stmt = first_a;
819 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
820 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
823 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
824 if (tree_int_cst_compare (next_init, node_init) > 0)
827 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
828 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
833 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
837 /* We got to the end of the list. Insert here. */
838 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
839 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
842 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
843 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
848 /* Function vect_equal_offsets.
850 Check if OFFSET1 and OFFSET2 are identical expressions. */
853 vect_equal_offsets (tree offset1, tree offset2)
857 STRIP_NOPS (offset1);
858 STRIP_NOPS (offset2);
860 if (offset1 == offset2)
863 if (TREE_CODE (offset1) != TREE_CODE (offset2)
864 || !BINARY_CLASS_P (offset1)
865 || !BINARY_CLASS_P (offset2))
868 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
869 TREE_OPERAND (offset2, 0));
870 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
871 TREE_OPERAND (offset2, 1));
873 return (res0 && res1);
877 /* Function vect_check_interleaving.
879 Check if DRA and DRB are a part of interleaving. In case they are, insert
880 DRA and DRB in an interleaving chain. */
883 vect_check_interleaving (struct data_reference *dra,
884 struct data_reference *drb)
886 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
888 /* Check that the data-refs have same first location (except init) and they
889 are both either store or load (not load and store). */
890 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
891 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
892 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
893 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
894 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
895 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
896 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
897 || DR_IS_READ (dra) != DR_IS_READ (drb))
901 1. data-refs are of the same type
902 2. their steps are equal
903 3. the step is greater than the difference between data-refs' inits */
904 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
905 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
907 if (type_size_a != type_size_b
908 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
911 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
912 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
913 step = TREE_INT_CST_LOW (DR_STEP (dra));
917 /* If init_a == init_b + the size of the type * k, we have an interleaving,
918 and DRB is accessed before DRA. */
919 diff_mod_size = (init_a - init_b) % type_size_a;
921 if ((init_a - init_b) > step)
924 if (diff_mod_size == 0)
926 vect_update_interleaving_chain (drb, dra);
927 if (vect_print_dump_info (REPORT_DR_DETAILS))
929 fprintf (vect_dump, "Detected interleaving ");
930 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
931 fprintf (vect_dump, " and ");
932 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
939 /* If init_b == init_a + the size of the type * k, we have an
940 interleaving, and DRA is accessed before DRB. */
941 diff_mod_size = (init_b - init_a) % type_size_a;
943 if ((init_b - init_a) > step)
946 if (diff_mod_size == 0)
948 vect_update_interleaving_chain (dra, drb);
949 if (vect_print_dump_info (REPORT_DR_DETAILS))
951 fprintf (vect_dump, "Detected interleaving ");
952 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
953 fprintf (vect_dump, " and ");
954 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
962 /* Function vect_analyze_data_ref_dependence.
964 Return TRUE if there (might) exist a dependence between a memory-reference
965 DRA and a memory-reference DRB. */
968 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
969 loop_vec_info loop_vinfo)
972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
973 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
974 struct data_reference *dra = DDR_A (ddr);
975 struct data_reference *drb = DDR_B (ddr);
976 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
977 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
978 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
979 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
980 lambda_vector dist_v;
981 unsigned int loop_depth;
983 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
985 /* Independent data accesses. */
986 vect_check_interleaving (dra, drb);
990 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
993 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
995 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
998 "not vectorized: can't determine dependence between ");
999 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1000 fprintf (vect_dump, " and ");
1001 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1006 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1008 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1010 fprintf (vect_dump, "not vectorized: bad dist vector for ");
1011 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1012 fprintf (vect_dump, " and ");
1013 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1018 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1019 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1021 int dist = dist_v[loop_depth];
1023 if (vect_print_dump_info (REPORT_DR_DETAILS))
1024 fprintf (vect_dump, "dependence distance = %d.", dist);
1026 /* Same loop iteration. */
1027 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1029 /* Two references with distance zero have the same alignment. */
1030 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1031 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1032 if (vect_print_dump_info (REPORT_ALIGNMENT))
1033 fprintf (vect_dump, "accesses have the same alignment.");
1034 if (vect_print_dump_info (REPORT_DR_DETAILS))
1036 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1037 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1038 fprintf (vect_dump, " and ");
1039 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1042 /* For interleaving, mark that there is a read-write dependency if
1043 necessary. We check before that one of the data-refs is store. */
1044 if (DR_IS_READ (dra))
1045 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1048 if (DR_IS_READ (drb))
1049 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1055 if (abs (dist) >= vectorization_factor)
1057 /* Dependence distance does not create dependence, as far as vectorization
1058 is concerned, in this case. */
1059 if (vect_print_dump_info (REPORT_DR_DETAILS))
1060 fprintf (vect_dump, "dependence distance >= VF.");
1064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1067 "not vectorized: possible dependence between data-refs ");
1068 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1069 fprintf (vect_dump, " and ");
1070 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1080 /* Function vect_analyze_data_ref_dependences.
1082 Examine all the data references in the loop, and make sure there do not
1083 exist any data dependences between them. */
1086 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1089 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1090 struct data_dependence_relation *ddr;
1092 if (vect_print_dump_info (REPORT_DETAILS))
1093 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1095 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1096 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1103 /* Function vect_compute_data_ref_alignment
1105 Compute the misalignment of the data reference DR.
1108 1. If during the misalignment computation it is found that the data reference
1109 cannot be vectorized then false is returned.
1110 2. DR_MISALIGNMENT (DR) is defined.
1112 FOR NOW: No analysis is actually performed. Misalignment is calculated
1113 only for trivial cases. TODO. */
1116 vect_compute_data_ref_alignment (struct data_reference *dr)
1118 tree stmt = DR_STMT (dr);
1119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1120 tree ref = DR_REF (dr);
1122 tree base, base_addr;
1125 tree aligned_to, alignment;
1127 if (vect_print_dump_info (REPORT_DETAILS))
1128 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1130 /* Initialize misalignment to unknown. */
1131 SET_DR_MISALIGNMENT (dr, -1);
1133 misalign = DR_INIT (dr);
1134 aligned_to = DR_ALIGNED_TO (dr);
1135 base_addr = DR_BASE_ADDRESS (dr);
1136 base = build_fold_indirect_ref (base_addr);
1137 vectype = STMT_VINFO_VECTYPE (stmt_info);
1138 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1140 if (tree_int_cst_compare (aligned_to, alignment) < 0)
1142 if (vect_print_dump_info (REPORT_DETAILS))
1144 fprintf (vect_dump, "Unknown alignment for access: ");
1145 print_generic_expr (vect_dump, base, TDF_SLIM);
1151 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1153 || (TREE_CODE (base_addr) == SSA_NAME
1154 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1155 TREE_TYPE (base_addr)))),
1157 base_aligned = true;
1159 base_aligned = false;
1163 /* Do not change the alignment of global variables if
1164 flag_section_anchors is enabled. */
1165 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1166 || (TREE_STATIC (base) && flag_section_anchors))
1168 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "can't force alignment of ref: ");
1171 print_generic_expr (vect_dump, ref, TDF_SLIM);
1176 /* Force the alignment of the decl.
1177 NOTE: This is the only change to the code we make during
1178 the analysis phase, before deciding to vectorize the loop. */
1179 if (vect_print_dump_info (REPORT_DETAILS))
1180 fprintf (vect_dump, "force alignment");
1181 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1182 DECL_USER_ALIGN (base) = 1;
1185 /* At this point we assume that the base is aligned. */
1186 gcc_assert (base_aligned
1187 || (TREE_CODE (base) == VAR_DECL
1188 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1190 /* Modulo alignment. */
1191 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1193 if (!host_integerp (misalign, 1))
1195 /* Negative or overflowed misalignment value. */
1196 if (vect_print_dump_info (REPORT_DETAILS))
1197 fprintf (vect_dump, "unexpected misalign value");
1201 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
1203 if (vect_print_dump_info (REPORT_DETAILS))
1205 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1206 print_generic_expr (vect_dump, ref, TDF_SLIM);
1213 /* Function vect_compute_data_refs_alignment
1215 Compute the misalignment of data references in the loop.
1216 Return FALSE if a data reference is found that cannot be vectorized. */
1219 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1221 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1222 struct data_reference *dr;
1225 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1226 if (!vect_compute_data_ref_alignment (dr))
1233 /* Function vect_update_misalignment_for_peel
1235 DR - the data reference whose misalignment is to be adjusted.
1236 DR_PEEL - the data reference whose misalignment is being made
1237 zero in the vector loop by the peel.
1238 NPEEL - the number of iterations in the peel loop if the misalignment
1239 of DR_PEEL is known at compile time. */
1242 vect_update_misalignment_for_peel (struct data_reference *dr,
1243 struct data_reference *dr_peel, int npeel)
1246 VEC(dr_p,heap) *same_align_drs;
1247 struct data_reference *current_dr;
1248 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1249 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1250 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1251 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1253 /* For interleaved data accesses the step in the loop must be multiplied by
1254 the size of the interleaving group. */
1255 if (DR_GROUP_FIRST_DR (stmt_info))
1256 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1257 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1258 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1260 /* It can be assumed that the data refs with the same alignment as dr_peel
1261 are aligned in the vector loop. */
1263 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1264 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1266 if (current_dr != dr)
1268 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1269 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1270 SET_DR_MISALIGNMENT (dr, 0);
1274 if (known_alignment_for_access_p (dr)
1275 && known_alignment_for_access_p (dr_peel))
1277 int misal = DR_MISALIGNMENT (dr);
1278 misal += npeel * dr_size;
1279 misal %= UNITS_PER_SIMD_WORD;
1280 SET_DR_MISALIGNMENT (dr, misal);
1284 if (vect_print_dump_info (REPORT_DETAILS))
1285 fprintf (vect_dump, "Setting misalignment to -1.");
1286 SET_DR_MISALIGNMENT (dr, -1);
1290 /* Function vect_verify_datarefs_alignment
1292 Return TRUE if all data references in the loop can be
1293 handled with respect to alignment. */
1296 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1298 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1299 struct data_reference *dr;
1300 enum dr_alignment_support supportable_dr_alignment;
1303 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1305 tree stmt = DR_STMT (dr);
1306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1308 /* For interleaving, only the alignment of the first access matters. */
1309 if (DR_GROUP_FIRST_DR (stmt_info)
1310 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1313 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1314 if (!supportable_dr_alignment)
1316 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1318 if (DR_IS_READ (dr))
1320 "not vectorized: unsupported unaligned load.");
1323 "not vectorized: unsupported unaligned store.");
1327 if (supportable_dr_alignment != dr_aligned
1328 && vect_print_dump_info (REPORT_ALIGNMENT))
1329 fprintf (vect_dump, "Vectorizing an unaligned access.");
1335 /* Function vect_enhance_data_refs_alignment
1337 This pass will use loop versioning and loop peeling in order to enhance
1338 the alignment of data references in the loop.
1340 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1341 original loop is to be vectorized; Any other loops that are created by
1342 the transformations performed in this pass - are not supposed to be
1343 vectorized. This restriction will be relaxed.
1345 This pass will require a cost model to guide it whether to apply peeling
1346 or versioning or a combination of the two. For example, the scheme that
1347 intel uses when given a loop with several memory accesses, is as follows:
1348 choose one memory access ('p') which alignment you want to force by doing
1349 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1350 other accesses are not necessarily aligned, or (2) use loop versioning to
1351 generate one loop in which all accesses are aligned, and another loop in
1352 which only 'p' is necessarily aligned.
1354 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1355 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1356 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1358 Devising a cost model is the most critical aspect of this work. It will
1359 guide us on which access to peel for, whether to use loop versioning, how
1360 many versions to create, etc. The cost model will probably consist of
1361 generic considerations as well as target specific considerations (on
1362 powerpc for example, misaligned stores are more painful than misaligned
1365 Here are the general steps involved in alignment enhancements:
1367 -- original loop, before alignment analysis:
1368 for (i=0; i<N; i++){
1369 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1370 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1373 -- After vect_compute_data_refs_alignment:
1374 for (i=0; i<N; i++){
1375 x = q[i]; # DR_MISALIGNMENT(q) = 3
1376 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1379 -- Possibility 1: we do loop versioning:
1381 for (i=0; i<N; i++){ # loop 1A
1382 x = q[i]; # DR_MISALIGNMENT(q) = 3
1383 p[i] = y; # DR_MISALIGNMENT(p) = 0
1387 for (i=0; i<N; i++){ # loop 1B
1388 x = q[i]; # DR_MISALIGNMENT(q) = 3
1389 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1393 -- Possibility 2: we do loop peeling:
1394 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1398 for (i = 3; i < N; i++){ # loop 2A
1399 x = q[i]; # DR_MISALIGNMENT(q) = 0
1400 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1403 -- Possibility 3: combination of loop peeling and versioning:
1404 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1409 for (i = 3; i<N; i++){ # loop 3A
1410 x = q[i]; # DR_MISALIGNMENT(q) = 0
1411 p[i] = y; # DR_MISALIGNMENT(p) = 0
1415 for (i = 3; i<N; i++){ # loop 3B
1416 x = q[i]; # DR_MISALIGNMENT(q) = 0
1417 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1421 These loops are later passed to loop_transform to be vectorized. The
1422 vectorizer will use the alignment information to guide the transformation
1423 (whether to generate regular loads/stores, or with special handling for
1427 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1429 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1431 enum dr_alignment_support supportable_dr_alignment;
1432 struct data_reference *dr0 = NULL;
1433 struct data_reference *dr;
1435 bool do_peeling = false;
1436 bool do_versioning = false;
1439 stmt_vec_info stmt_info;
1441 if (vect_print_dump_info (REPORT_DETAILS))
1442 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1444 /* While cost model enhancements are expected in the future, the high level
1445 view of the code at this time is as follows:
1447 A) If there is a misaligned write then see if peeling to align this write
1448 can make all data references satisfy vect_supportable_dr_alignment.
1449 If so, update data structures as needed and return true. Note that
1450 at this time vect_supportable_dr_alignment is known to return false
1451 for a misaligned write.
1453 B) If peeling wasn't possible and there is a data reference with an
1454 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1455 then see if loop versioning checks can be used to make all data
1456 references satisfy vect_supportable_dr_alignment. If so, update
1457 data structures as needed and return true.
1459 C) If neither peeling nor versioning were successful then return false if
1460 any data reference does not satisfy vect_supportable_dr_alignment.
1462 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1464 Note, Possibility 3 above (which is peeling and versioning together) is not
1465 being done at this time. */
1467 /* (1) Peeling to force alignment. */
1469 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1471 + How many accesses will become aligned due to the peeling
1472 - How many accesses will become unaligned due to the peeling,
1473 and the cost of misaligned accesses.
1474 - The cost of peeling (the extra runtime checks, the increase
1477 The scheme we use FORNOW: peel to force the alignment of the first
1478 misaligned store in the loop.
1479 Rationale: misaligned stores are not yet supported.
1481 TODO: Use a cost model. */
1483 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1485 stmt = DR_STMT (dr);
1486 stmt_info = vinfo_for_stmt (stmt);
1488 /* For interleaving, only the alignment of the first access
1490 if (DR_GROUP_FIRST_DR (stmt_info)
1491 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1494 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1496 if (DR_GROUP_FIRST_DR (stmt_info))
1498 /* For interleaved access we peel only if number of iterations in
1499 the prolog loop ({VF - misalignment}), is a multiple of the
1500 number of the interleaved accesses. */
1501 int elem_size, mis_in_elements;
1502 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1503 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1505 /* FORNOW: handle only known alignment. */
1506 if (!known_alignment_for_access_p (dr))
1512 elem_size = UNITS_PER_SIMD_WORD / nelements;
1513 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1515 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1527 /* Often peeling for alignment will require peeling for loop-bound, which in
1528 turn requires that we know how to adjust the loop ivs after the loop. */
1529 if (!vect_can_advance_ivs_p (loop_vinfo)
1530 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1537 tree stmt = DR_STMT (dr0);
1538 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1539 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1540 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1542 if (known_alignment_for_access_p (dr0))
1544 /* Since it's known at compile time, compute the number of iterations
1545 in the peeled loop (the peeling factor) for use in updating
1546 DR_MISALIGNMENT values. The peeling factor is the vectorization
1547 factor minus the misalignment as an element count. */
1548 mis = DR_MISALIGNMENT (dr0);
1549 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1550 npeel = nelements - mis;
1552 /* For interleaved data access every iteration accesses all the
1553 members of the group, therefore we divide the number of iterations
1554 by the group size. */
1555 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1556 if (DR_GROUP_FIRST_DR (stmt_info))
1557 npeel /= DR_GROUP_SIZE (stmt_info);
1559 if (vect_print_dump_info (REPORT_DETAILS))
1560 fprintf (vect_dump, "Try peeling by %d", npeel);
1563 /* Ensure that all data refs can be vectorized after the peel. */
1564 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1566 int save_misalignment;
1571 stmt = DR_STMT (dr);
1572 stmt_info = vinfo_for_stmt (stmt);
1573 /* For interleaving, only the alignment of the first access
1575 if (DR_GROUP_FIRST_DR (stmt_info)
1576 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1579 save_misalignment = DR_MISALIGNMENT (dr);
1580 vect_update_misalignment_for_peel (dr, dr0, npeel);
1581 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1582 SET_DR_MISALIGNMENT (dr, save_misalignment);
1584 if (!supportable_dr_alignment)
1593 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1594 If the misalignment of DR_i is identical to that of dr0 then set
1595 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1596 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1597 by the peeling factor times the element size of DR_i (MOD the
1598 vectorization factor times the size). Otherwise, the
1599 misalignment of DR_i must be set to unknown. */
1600 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1602 vect_update_misalignment_for_peel (dr, dr0, npeel);
1604 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1605 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1606 SET_DR_MISALIGNMENT (dr0, 0);
1607 if (vect_print_dump_info (REPORT_ALIGNMENT))
1608 fprintf (vect_dump, "Alignment of access forced using peeling.");
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "Peeling for alignment will be applied.");
1613 stat = vect_verify_datarefs_alignment (loop_vinfo);
1620 /* (2) Versioning to force alignment. */
1622 /* Try versioning if:
1623 1) flag_tree_vect_loop_version is TRUE
1624 2) optimize_size is FALSE
1625 3) there is at least one unsupported misaligned data ref with an unknown
1627 4) all misaligned data refs with a known misalignment are supported, and
1628 5) the number of runtime alignment checks is within reason. */
1630 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1634 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1636 stmt = DR_STMT (dr);
1637 stmt_info = vinfo_for_stmt (stmt);
1639 /* For interleaving, only the alignment of the first access
1641 if (aligned_access_p (dr)
1642 || (DR_GROUP_FIRST_DR (stmt_info)
1643 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1646 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1648 if (!supportable_dr_alignment)
1654 if (known_alignment_for_access_p (dr)
1655 || VEC_length (tree,
1656 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1657 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1659 do_versioning = false;
1663 stmt = DR_STMT (dr);
1664 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1665 gcc_assert (vectype);
1667 /* The rightmost bits of an aligned address must be zeros.
1668 Construct the mask needed for this test. For example,
1669 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1670 mask must be 15 = 0xf. */
1671 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1673 /* FORNOW: use the same mask to test all potentially unaligned
1674 references in the loop. The vectorizer currently supports
1675 a single vector size, see the reference to
1676 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1677 vectorization factor is computed. */
1678 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1679 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1680 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1681 VEC_safe_push (tree, heap,
1682 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1687 /* Versioning requires at least one misaligned data reference. */
1688 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1689 do_versioning = false;
1690 else if (!do_versioning)
1691 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1696 VEC(tree,heap) *may_misalign_stmts
1697 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1700 /* It can now be assumed that the data references in the statements
1701 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1702 of the loop being vectorized. */
1703 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1705 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1706 dr = STMT_VINFO_DATA_REF (stmt_info);
1707 SET_DR_MISALIGNMENT (dr, 0);
1708 if (vect_print_dump_info (REPORT_ALIGNMENT))
1709 fprintf (vect_dump, "Alignment of access forced using versioning.");
1712 if (vect_print_dump_info (REPORT_DETAILS))
1713 fprintf (vect_dump, "Versioning for alignment will be applied.");
1715 /* Peeling and versioning can't be done together at this time. */
1716 gcc_assert (! (do_peeling && do_versioning));
1718 stat = vect_verify_datarefs_alignment (loop_vinfo);
1723 /* This point is reached if neither peeling nor versioning is being done. */
1724 gcc_assert (! (do_peeling || do_versioning));
1726 stat = vect_verify_datarefs_alignment (loop_vinfo);
1731 /* Function vect_analyze_data_refs_alignment
1733 Analyze the alignment of the data-references in the loop.
1734 Return FALSE if a data reference is found that cannot be vectorized. */
1737 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1739 if (vect_print_dump_info (REPORT_DETAILS))
1740 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1742 if (!vect_compute_data_refs_alignment (loop_vinfo))
1744 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1746 "not vectorized: can't calculate alignment for data ref.");
1754 /* Function vect_analyze_data_ref_access.
1756 Analyze the access pattern of the data-reference DR. For now, a data access
1757 has to be consecutive to be considered vectorizable. */
1760 vect_analyze_data_ref_access (struct data_reference *dr)
1762 tree step = DR_STEP (dr);
1763 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1764 tree scalar_type = TREE_TYPE (DR_REF (dr));
1765 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1766 tree stmt = DR_STMT (dr);
1767 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1768 interleaving group (including gaps). */
1769 HOST_WIDE_INT stride = dr_step / type_size;
1773 if (vect_print_dump_info (REPORT_DETAILS))
1774 fprintf (vect_dump, "bad data-ref access");
1779 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1781 /* Mark that it is not interleaving. */
1782 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1786 /* Not consecutive access is possible only if it is a part of interleaving. */
1787 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1789 /* Check if it this DR is a part of interleaving, and is a single
1790 element of the group that is accessed in the loop. */
1792 /* Gaps are supported only for loads. STEP must be a multiple of the type
1793 size. The size of the group must be a power of 2. */
1795 && (dr_step % type_size) == 0
1797 && exact_log2 (stride) != -1)
1799 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1800 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1801 if (vect_print_dump_info (REPORT_DR_DETAILS))
1803 fprintf (vect_dump, "Detected single element interleaving %d ",
1804 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1805 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1806 fprintf (vect_dump, " step ");
1807 print_generic_expr (vect_dump, step, TDF_SLIM);
1811 if (vect_print_dump_info (REPORT_DETAILS))
1812 fprintf (vect_dump, "not consecutive access");
1816 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1818 /* First stmt in the interleaving chain. Check the chain. */
1819 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1820 struct data_reference *data_ref = dr;
1821 unsigned int count = 1;
1823 tree prev_init = DR_INIT (data_ref);
1825 HOST_WIDE_INT diff, count_in_bytes;
1829 /* Skip same data-refs. In case that two or more stmts share data-ref
1830 (supported only for loads), we vectorize only the first stmt, and
1831 the rest get their vectorized loads from the first one. */
1832 if (!tree_int_cst_compare (DR_INIT (data_ref),
1833 DR_INIT (STMT_VINFO_DATA_REF (
1834 vinfo_for_stmt (next)))))
1836 if (!DR_IS_READ (data_ref))
1838 if (vect_print_dump_info (REPORT_DETAILS))
1839 fprintf (vect_dump, "Two store stmts share the same dr.");
1843 /* Check that there is no load-store dependencies for this loads
1844 to prevent a case of load-store-load to the same location. */
1845 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1846 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1848 if (vect_print_dump_info (REPORT_DETAILS))
1850 "READ_WRITE dependence in interleaving.");
1854 /* For load use the same data-ref load. */
1855 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1858 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1863 /* Check that all the accesses have the same STEP. */
1864 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1865 if (tree_int_cst_compare (step, next_step))
1867 if (vect_print_dump_info (REPORT_DETAILS))
1868 fprintf (vect_dump, "not consecutive access in interleaving");
1872 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1873 /* Check that the distance between two accesses is equal to the type
1874 size. Otherwise, we have gaps. */
1875 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1876 - TREE_INT_CST_LOW (prev_init)) / type_size;
1877 if (!DR_IS_READ (data_ref) && diff != 1)
1879 if (vect_print_dump_info (REPORT_DETAILS))
1880 fprintf (vect_dump, "interleaved store with gaps");
1883 /* Store the gap from the previous member of the group. If there is no
1884 gap in the access, DR_GROUP_GAP is always 1. */
1885 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1887 prev_init = DR_INIT (data_ref);
1888 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1889 /* Count the number of data-refs in the chain. */
1893 /* COUNT is the number of accesses found, we multiply it by the size of
1894 the type to get COUNT_IN_BYTES. */
1895 count_in_bytes = type_size * count;
1897 /* Check that the size of the interleaving is not greater than STEP. */
1898 if (dr_step < count_in_bytes)
1900 if (vect_print_dump_info (REPORT_DETAILS))
1902 fprintf (vect_dump, "interleaving size is greater than step for ");
1903 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1908 /* Check that the size of the interleaving is equal to STEP for stores,
1909 i.e., that there are no gaps. */
1910 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1912 if (vect_print_dump_info (REPORT_DETAILS))
1913 fprintf (vect_dump, "interleaved store with gaps");
1917 /* Check that STEP is a multiple of type size. */
1918 if ((dr_step % type_size) != 0)
1920 if (vect_print_dump_info (REPORT_DETAILS))
1922 fprintf (vect_dump, "step is not a multiple of type size: step ");
1923 print_generic_expr (vect_dump, step, TDF_SLIM);
1924 fprintf (vect_dump, " size ");
1925 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1931 /* FORNOW: we handle only interleaving that is a power of 2. */
1932 if (exact_log2 (stride) == -1)
1934 if (vect_print_dump_info (REPORT_DETAILS))
1935 fprintf (vect_dump, "interleaving is not a power of 2");
1938 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1944 /* Function vect_analyze_data_ref_accesses.
1946 Analyze the access pattern of all the data references in the loop.
1948 FORNOW: the only access pattern that is considered vectorizable is a
1949 simple step 1 (consecutive) access.
1951 FORNOW: handle only arrays and pointer accesses. */
1954 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1957 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1958 struct data_reference *dr;
1960 if (vect_print_dump_info (REPORT_DETAILS))
1961 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1963 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1964 if (!vect_analyze_data_ref_access (dr))
1966 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1967 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1975 /* Function vect_analyze_data_refs.
1977 Find all the data references in the loop.
1979 The general structure of the analysis of data refs in the vectorizer is as
1981 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1982 find and analyze all data-refs in the loop and their dependences.
1983 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1984 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1985 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1990 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1992 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1994 VEC (data_reference_p, heap) *datarefs;
1995 struct data_reference *dr;
1998 if (vect_print_dump_info (REPORT_DETAILS))
1999 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2001 compute_data_dependences_for_loop (loop, true,
2002 &LOOP_VINFO_DATAREFS (loop_vinfo),
2003 &LOOP_VINFO_DDRS (loop_vinfo));
2005 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2006 from stmt_vec_info struct to DR and vectype. */
2007 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2009 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2012 stmt_vec_info stmt_info;
2014 if (!dr || !DR_REF (dr))
2016 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2017 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2021 /* Update DR field in stmt_vec_info struct. */
2022 stmt = DR_STMT (dr);
2023 stmt_info = vinfo_for_stmt (stmt);
2025 if (STMT_VINFO_DATA_REF (stmt_info))
2027 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2030 "not vectorized: more than one data ref in stmt: ");
2031 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2035 STMT_VINFO_DATA_REF (stmt_info) = dr;
2037 /* Check that analysis of the data-ref succeeded. */
2038 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2041 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2043 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2044 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2048 if (!DR_SYMBOL_TAG (dr))
2050 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2052 fprintf (vect_dump, "not vectorized: no memory tag for ");
2053 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2058 /* Set vectype for STMT. */
2059 scalar_type = TREE_TYPE (DR_REF (dr));
2060 STMT_VINFO_VECTYPE (stmt_info) =
2061 get_vectype_for_scalar_type (scalar_type);
2062 if (!STMT_VINFO_VECTYPE (stmt_info))
2064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2067 "not vectorized: no vectype for stmt: ");
2068 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2069 fprintf (vect_dump, " scalar_type: ");
2070 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2080 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2082 /* Function vect_mark_relevant.
2084 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2087 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2088 enum vect_relevant relevant, bool live_p)
2090 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2091 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2092 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2094 if (vect_print_dump_info (REPORT_DETAILS))
2095 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2097 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2101 /* This is the last stmt in a sequence that was detected as a
2102 pattern that can potentially be vectorized. Don't mark the stmt
2103 as relevant/live because it's not going to vectorized.
2104 Instead mark the pattern-stmt that replaces it. */
2105 if (vect_print_dump_info (REPORT_DETAILS))
2106 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2107 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2108 stmt_info = vinfo_for_stmt (pattern_stmt);
2109 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2110 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2111 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2112 stmt = pattern_stmt;
2115 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2116 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2117 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2119 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2120 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2122 if (vect_print_dump_info (REPORT_DETAILS))
2123 fprintf (vect_dump, "already marked relevant/live.");
2127 VEC_safe_push (tree, heap, *worklist, stmt);
2131 /* Function vect_stmt_relevant_p.
2133 Return true if STMT in loop that is represented by LOOP_VINFO is
2134 "relevant for vectorization".
2136 A stmt is considered "relevant for vectorization" if:
2137 - it has uses outside the loop.
2138 - it has vdefs (it alters memory).
2139 - control stmts in the loop (except for the exit condition).
2141 CHECKME: what other side effects would the vectorizer allow? */
2144 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2145 enum vect_relevant *relevant, bool *live_p)
2147 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2148 ssa_op_iter op_iter;
2149 imm_use_iterator imm_iter;
2150 use_operand_p use_p;
2151 def_operand_p def_p;
2153 *relevant = vect_unused_in_loop;
2156 /* cond stmt other than loop exit cond. */
2157 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2158 *relevant = vect_used_in_loop;
2160 /* changing memory. */
2161 if (TREE_CODE (stmt) != PHI_NODE)
2162 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2164 if (vect_print_dump_info (REPORT_DETAILS))
2165 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2166 *relevant = vect_used_in_loop;
2169 /* uses outside the loop. */
2170 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2172 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2174 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2175 if (!flow_bb_inside_loop_p (loop, bb))
2177 if (vect_print_dump_info (REPORT_DETAILS))
2178 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2180 /* We expect all such uses to be in the loop exit phis
2181 (because of loop closed form) */
2182 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2183 gcc_assert (bb == single_exit (loop)->dest);
2190 return (*live_p || *relevant);
2195 Function process_use.
2198 - a USE in STMT in a loop represented by LOOP_VINFO
2199 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2200 that defined USE. This is dont by calling mark_relevant and passing it
2201 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2204 Generally, LIVE_P and RELEVANT are used to define the liveness and
2205 relevance info of the DEF_STMT of this USE:
2206 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2207 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2209 - case 1: If USE is used only for address computations (e.g. array indexing),
2210 which does not need to be directly vectorized, then the liveness/relevance
2211 of the respective DEF_STMT is left unchanged.
2212 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2213 skip DEF_STMT cause it had already been processed.
2215 Return true if everything is as expected. Return false otherwise. */
2218 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
2219 enum vect_relevant relevant, VEC(tree,heap) **worklist)
2221 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2222 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2223 stmt_vec_info dstmt_vinfo;
2226 enum vect_def_type dt;
2228 /* case 1: we are only interested in uses that need to be vectorized. Uses
2229 that are used for address computation are not considered relevant. */
2230 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2233 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2235 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2236 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2240 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2243 def_bb = bb_for_stmt (def_stmt);
2244 if (!flow_bb_inside_loop_p (loop, def_bb))
2247 /* case 2: A reduction phi defining a reduction stmt (DEF_STMT). DEF_STMT
2248 must have already been processed, so we just check that everything is as
2249 expected, and we are done. */
2250 dstmt_vinfo = vinfo_for_stmt (def_stmt);
2251 if (TREE_CODE (stmt) == PHI_NODE
2252 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2253 && TREE_CODE (def_stmt) != PHI_NODE
2254 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def)
2256 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
2257 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
2258 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
2259 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
2260 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
2264 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
2269 /* Function vect_mark_stmts_to_be_vectorized.
2271 Not all stmts in the loop need to be vectorized. For example:
2280 Stmt 1 and 3 do not need to be vectorized, because loop control and
2281 addressing of vectorized data-refs are handled differently.
2283 This pass detects such stmts. */
2286 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2288 VEC(tree,heap) *worklist;
2289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2290 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2291 unsigned int nbbs = loop->num_nodes;
2292 block_stmt_iterator si;
2296 stmt_vec_info stmt_vinfo;
2300 enum vect_relevant relevant;
2302 if (vect_print_dump_info (REPORT_DETAILS))
2303 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2305 worklist = VEC_alloc (tree, heap, 64);
2307 /* 1. Init worklist. */
2308 for (i = 0; i < nbbs; i++)
2311 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2313 if (vect_print_dump_info (REPORT_DETAILS))
2315 fprintf (vect_dump, "init: phi relevant? ");
2316 print_generic_expr (vect_dump, phi, TDF_SLIM);
2319 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2320 vect_mark_relevant (&worklist, phi, relevant, live_p);
2322 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2324 stmt = bsi_stmt (si);
2325 if (vect_print_dump_info (REPORT_DETAILS))
2327 fprintf (vect_dump, "init: stmt relevant? ");
2328 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2331 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2332 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2336 /* 2. Process_worklist */
2337 while (VEC_length (tree, worklist) > 0)
2339 use_operand_p use_p;
2342 stmt = VEC_pop (tree, worklist);
2343 if (vect_print_dump_info (REPORT_DETAILS))
2345 fprintf (vect_dump, "worklist: examine stmt: ");
2346 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2349 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2350 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2351 liveness and relevance properties of STMT. */
2352 ann = stmt_ann (stmt);
2353 stmt_vinfo = vinfo_for_stmt (stmt);
2354 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2355 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2357 /* Generally, the liveness and relevance properties of STMT are
2358 propagated as is to the DEF_STMTs of its USEs:
2359 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2360 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2362 One exception is when STMT has been identified as defining a reduction
2363 variable; in this case we set the liveness/relevance as follows:
2365 relevant = vect_used_by_reduction
2366 This is because we distinguish between two kinds of relevant stmts -
2367 those that are used by a reduction computation, and those that are
2368 (also) used by a regular computation. This allows us later on to
2369 identify stmts that are used solely by a reduction, and therefore the
2370 order of the results that they produce does not have to be kept.
2372 Reduction phis are expected to be used by a reduction stmt; Other
2373 reduction stmts are expected to be unused in the loop. These are the
2374 expected values of "relevant" for reduction phis/stmts in the loop:
2377 vect_unused_in_loop ok
2378 vect_used_by_reduction ok
2379 vect_used_in_loop */
2381 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2385 case vect_unused_in_loop:
2386 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2388 case vect_used_by_reduction:
2389 if (TREE_CODE (stmt) == PHI_NODE)
2391 case vect_used_in_loop:
2393 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "unsupported use of reduction.");
2395 VEC_free (tree, heap, worklist);
2398 relevant = vect_used_by_reduction;
2402 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2404 tree op = USE_FROM_PTR (use_p);
2405 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
2407 VEC_free (tree, heap, worklist);
2411 } /* while worklist */
2413 VEC_free (tree, heap, worklist);
2418 /* Function vect_can_advance_ivs_p
2420 In case the number of iterations that LOOP iterates is unknown at compile
2421 time, an epilog loop will be generated, and the loop induction variables
2422 (IVs) will be "advanced" to the value they are supposed to take just before
2423 the epilog loop. Here we check that the access function of the loop IVs
2424 and the expression that represents the loop bound are simple enough.
2425 These restrictions will be relaxed in the future. */
2428 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2430 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2431 basic_block bb = loop->header;
2434 /* Analyze phi functions of the loop header. */
2436 if (vect_print_dump_info (REPORT_DETAILS))
2437 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2439 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2441 tree access_fn = NULL;
2442 tree evolution_part;
2444 if (vect_print_dump_info (REPORT_DETAILS))
2446 fprintf (vect_dump, "Analyze phi: ");
2447 print_generic_expr (vect_dump, phi, TDF_SLIM);
2450 /* Skip virtual phi's. The data dependences that are associated with
2451 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2453 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "virtual phi. skip.");
2460 /* Skip reduction phis. */
2462 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2464 if (vect_print_dump_info (REPORT_DETAILS))
2465 fprintf (vect_dump, "reduc phi. skip.");
2469 /* Analyze the evolution function. */
2471 access_fn = instantiate_parameters
2472 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2476 if (vect_print_dump_info (REPORT_DETAILS))
2477 fprintf (vect_dump, "No Access function.");
2481 if (vect_print_dump_info (REPORT_DETAILS))
2483 fprintf (vect_dump, "Access function of PHI: ");
2484 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2487 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2489 if (evolution_part == NULL_TREE)
2491 if (vect_print_dump_info (REPORT_DETAILS))
2492 fprintf (vect_dump, "No evolution.");
2496 /* FORNOW: We do not transform initial conditions of IVs
2497 which evolution functions are a polynomial of degree >= 2. */
2499 if (tree_is_chrec (evolution_part))
2507 /* Function vect_get_loop_niters.
2509 Determine how many iterations the loop is executed.
2510 If an expression that represents the number of iterations
2511 can be constructed, place it in NUMBER_OF_ITERATIONS.
2512 Return the loop exit condition. */
2515 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2519 if (vect_print_dump_info (REPORT_DETAILS))
2520 fprintf (vect_dump, "=== get_loop_niters ===");
2522 niters = number_of_exit_cond_executions (loop);
2524 if (niters != NULL_TREE
2525 && niters != chrec_dont_know)
2527 *number_of_iterations = niters;
2529 if (vect_print_dump_info (REPORT_DETAILS))
2531 fprintf (vect_dump, "==> get_loop_niters:" );
2532 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2536 return get_loop_exit_condition (loop);
2540 /* Function vect_analyze_loop_form.
2542 Verify the following restrictions (some may be relaxed in the future):
2543 - it's an inner-most loop
2544 - number of BBs = 2 (which are the loop header and the latch)
2545 - the loop has a pre-header
2546 - the loop has a single entry and exit
2547 - the loop exit condition is simple enough, and the number of iterations
2548 can be analyzed (a countable loop). */
2550 static loop_vec_info
2551 vect_analyze_loop_form (struct loop *loop)
2553 loop_vec_info loop_vinfo;
2555 tree number_of_iterations = NULL;
2557 if (vect_print_dump_info (REPORT_DETAILS))
2558 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2562 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2563 fprintf (vect_dump, "not vectorized: nested loop.");
2567 if (!single_exit (loop)
2568 || loop->num_nodes != 2
2569 || EDGE_COUNT (loop->header->preds) != 2)
2571 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2573 if (!single_exit (loop))
2574 fprintf (vect_dump, "not vectorized: multiple exits.");
2575 else if (loop->num_nodes != 2)
2576 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2577 else if (EDGE_COUNT (loop->header->preds) != 2)
2578 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2584 /* We assume that the loop exit condition is at the end of the loop. i.e,
2585 that the loop is represented as a do-while (with a proper if-guard
2586 before the loop if needed), where the loop header contains all the
2587 executable statements, and the latch is empty. */
2588 if (!empty_block_p (loop->latch)
2589 || phi_nodes (loop->latch))
2591 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2592 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2596 /* Make sure there exists a single-predecessor exit bb: */
2597 if (!single_pred_p (single_exit (loop)->dest))
2599 edge e = single_exit (loop);
2600 if (!(e->flags & EDGE_ABNORMAL))
2602 split_loop_exit_edge (e);
2603 if (vect_print_dump_info (REPORT_DETAILS))
2604 fprintf (vect_dump, "split exit edge.");
2608 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2609 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2614 if (empty_block_p (loop->header))
2616 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2617 fprintf (vect_dump, "not vectorized: empty loop.");
2621 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2624 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2625 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2629 if (!number_of_iterations)
2631 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2633 "not vectorized: number of iterations cannot be computed.");
2637 if (chrec_contains_undetermined (number_of_iterations))
2639 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2640 fprintf (vect_dump, "Infinite number of iterations.");
2644 if (!NITERS_KNOWN_P (number_of_iterations))
2646 if (vect_print_dump_info (REPORT_DETAILS))
2648 fprintf (vect_dump, "Symbolic number of iterations is ");
2649 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2652 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
2654 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2655 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2659 loop_vinfo = new_loop_vec_info (loop);
2660 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2661 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2663 gcc_assert (!loop->aux);
2664 loop->aux = loop_vinfo;
2669 /* Function vect_analyze_loop.
2671 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2672 for it. The different analyses will record information in the
2673 loop_vec_info struct. */
2675 vect_analyze_loop (struct loop *loop)
2678 loop_vec_info loop_vinfo;
2680 if (vect_print_dump_info (REPORT_DETAILS))
2681 fprintf (vect_dump, "===== analyze_loop_nest =====");
2683 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2685 loop_vinfo = vect_analyze_loop_form (loop);
2688 if (vect_print_dump_info (REPORT_DETAILS))
2689 fprintf (vect_dump, "bad loop form.");
2693 /* Find all data references in the loop (which correspond to vdefs/vuses)
2694 and analyze their evolution in the loop.
2696 FORNOW: Handle only simple, array references, which
2697 alignment can be forced, and aligned pointer-references. */
2699 ok = vect_analyze_data_refs (loop_vinfo);
2702 if (vect_print_dump_info (REPORT_DETAILS))
2703 fprintf (vect_dump, "bad data references.");
2704 destroy_loop_vec_info (loop_vinfo);
2708 /* Classify all cross-iteration scalar data-flow cycles.
2709 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2711 vect_analyze_scalar_cycles (loop_vinfo);
2713 vect_pattern_recog (loop_vinfo);
2715 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2717 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2720 if (vect_print_dump_info (REPORT_DETAILS))
2721 fprintf (vect_dump, "unexpected pattern.");
2722 destroy_loop_vec_info (loop_vinfo);
2726 /* Analyze the alignment of the data-refs in the loop.
2727 Fail if a data reference is found that cannot be vectorized. */
2729 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2732 if (vect_print_dump_info (REPORT_DETAILS))
2733 fprintf (vect_dump, "bad data alignment.");
2734 destroy_loop_vec_info (loop_vinfo);
2738 ok = vect_determine_vectorization_factor (loop_vinfo);
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "can't determine vectorization factor.");
2743 destroy_loop_vec_info (loop_vinfo);
2747 /* Analyze data dependences between the data-refs in the loop.
2748 FORNOW: fail at the first data dependence that we encounter. */
2750 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2753 if (vect_print_dump_info (REPORT_DETAILS))
2754 fprintf (vect_dump, "bad data dependence.");
2755 destroy_loop_vec_info (loop_vinfo);
2759 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2760 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2762 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2765 if (vect_print_dump_info (REPORT_DETAILS))
2766 fprintf (vect_dump, "bad data access.");
2767 destroy_loop_vec_info (loop_vinfo);
2771 /* This pass will decide on using loop versioning and/or loop peeling in
2772 order to enhance the alignment of data references in the loop. */
2774 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2777 if (vect_print_dump_info (REPORT_DETAILS))
2778 fprintf (vect_dump, "bad data alignment.");
2779 destroy_loop_vec_info (loop_vinfo);
2783 /* Scan all the operations in the loop and make sure they are
2786 ok = vect_analyze_operations (loop_vinfo);
2789 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2791 destroy_loop_vec_info (loop_vinfo);
2795 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;