1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
104 stmt_vec_info stmt_info;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
179 gcc_assert (stmt_info);
181 /* skip stmts which do not need to be vectorized. */
182 if (!STMT_VINFO_RELEVANT_P (stmt_info)
183 && !STMT_VINFO_LIVE_P (stmt_info))
185 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "skip.");
190 if (!GIMPLE_STMT_P (stmt)
191 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
193 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
195 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
196 print_generic_expr (vect_dump, stmt, TDF_SLIM);
201 if (STMT_VINFO_VECTYPE (stmt_info))
203 /* The only case when a vectype had been already set is for stmts
204 that contain a dataref, or for "pattern-stmts" (stmts generated
205 by the vectorizer to represent/replace a certain idiom). */
206 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
207 || is_pattern_stmt_p (stmt_info));
208 vectype = STMT_VINFO_VECTYPE (stmt_info);
212 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
213 && !is_pattern_stmt_p (stmt_info));
215 /* We set the vectype according to the type of the result (lhs).
216 For stmts whose result-type is different than the type of the
217 arguments (e.g. demotion, promotion), vectype will be reset
218 appropriately (later). Note that we have to visit the smallest
219 datatype in this function, because that determines the VF.
220 If the smallest datatype in the loop is present only as the
221 rhs of a promotion operation - we'd miss it here.
222 However, in such a case, that a variable of this datatype
223 does not appear in the lhs anywhere in the loop, it shouldn't
224 affect the vectorization factor. */
225 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
227 if (vect_print_dump_info (REPORT_DETAILS))
229 fprintf (vect_dump, "get vectype for scalar type: ");
230 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
233 vectype = get_vectype_for_scalar_type (scalar_type);
236 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
239 "not vectorized: unsupported data-type ");
240 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
244 STMT_VINFO_VECTYPE (stmt_info) = vectype;
247 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vectype: ");
250 print_generic_expr (vect_dump, vectype, TDF_SLIM);
253 nunits = TYPE_VECTOR_SUBPARTS (vectype);
254 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "nunits = %d", nunits);
257 if (!vectorization_factor
258 || (nunits > vectorization_factor))
259 vectorization_factor = nunits;
264 /* TODO: Analyze cost. Decide if worth while to vectorize. */
265 if (vect_print_dump_info (REPORT_DETAILS))
266 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
267 if (vectorization_factor <= 1)
269 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
270 fprintf (vect_dump, "not vectorized: unsupported data-type");
273 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
279 /* Function vect_analyze_operations.
281 Scan the loop stmts and make sure they are all vectorizable. */
284 vect_analyze_operations (loop_vec_info loop_vinfo)
286 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
287 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
288 int nbbs = loop->num_nodes;
289 block_stmt_iterator si;
290 unsigned int vectorization_factor = 0;
294 stmt_vec_info stmt_info;
295 bool need_to_vectorize = false;
297 if (vect_print_dump_info (REPORT_DETAILS))
298 fprintf (vect_dump, "=== vect_analyze_operations ===");
300 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
301 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
303 for (i = 0; i < nbbs; i++)
305 basic_block bb = bbs[i];
307 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
311 stmt_info = vinfo_for_stmt (phi);
312 if (vect_print_dump_info (REPORT_DETAILS))
314 fprintf (vect_dump, "examining phi: ");
315 print_generic_expr (vect_dump, phi, TDF_SLIM);
318 gcc_assert (stmt_info);
320 if (STMT_VINFO_LIVE_P (stmt_info))
322 /* FORNOW: not yet supported. */
323 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
324 fprintf (vect_dump, "not vectorized: value used after loop.");
328 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
329 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
331 /* A scalar-dependence cycle that we don't support. */
332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
333 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
337 if (STMT_VINFO_RELEVANT_P (stmt_info))
339 need_to_vectorize = true;
340 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
341 ok = vectorizable_induction (phi, NULL, NULL);
346 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
349 "not vectorized: relevant phi not supported: ");
350 print_generic_expr (vect_dump, phi, TDF_SLIM);
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
358 tree stmt = bsi_stmt (si);
359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
360 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
362 if (vect_print_dump_info (REPORT_DETAILS))
364 fprintf (vect_dump, "==> examining statement: ");
365 print_generic_expr (vect_dump, stmt, TDF_SLIM);
368 gcc_assert (stmt_info);
370 /* skip stmts which do not need to be vectorized.
371 this is expected to include:
372 - the COND_EXPR which is the loop exit condition
373 - any LABEL_EXPRs in the loop
374 - computations that are used only for array indexing or loop
377 if (!STMT_VINFO_RELEVANT_P (stmt_info)
378 && !STMT_VINFO_LIVE_P (stmt_info))
380 if (vect_print_dump_info (REPORT_DETAILS))
381 fprintf (vect_dump, "irrelevant.");
385 switch (STMT_VINFO_DEF_TYPE (stmt_info))
390 case vect_reduction_def:
391 gcc_assert (relevance == vect_unused_in_loop);
394 case vect_induction_def:
395 case vect_constant_def:
396 case vect_invariant_def:
397 case vect_unknown_def_type:
402 if (STMT_VINFO_RELEVANT_P (stmt_info))
404 gcc_assert (GIMPLE_STMT_P (stmt)
405 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
406 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
407 need_to_vectorize = true;
410 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
411 || vectorizable_type_demotion (stmt, NULL, NULL)
412 || vectorizable_conversion (stmt, NULL, NULL)
413 || vectorizable_operation (stmt, NULL, NULL)
414 || vectorizable_assignment (stmt, NULL, NULL)
415 || vectorizable_load (stmt, NULL, NULL)
416 || vectorizable_call (stmt, NULL, NULL)
417 || vectorizable_store (stmt, NULL, NULL)
418 || vectorizable_condition (stmt, NULL, NULL)
419 || vectorizable_reduction (stmt, NULL, NULL));
421 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
422 need extra handling, except for vectorizable reductions. */
423 if (STMT_VINFO_LIVE_P (stmt_info)
424 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
425 ok |= vectorizable_live_operation (stmt, NULL, NULL);
429 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
431 fprintf (vect_dump, "not vectorized: stmt not supported: ");
432 print_generic_expr (vect_dump, stmt, TDF_SLIM);
439 /* TODO: Analyze cost. Decide if worth while to vectorize. */
441 /* All operations in the loop are either irrelevant (deal with loop
442 control, or dead), or only used outside the loop and can be moved
443 out of the loop (e.g. invariants, inductions). The loop can be
444 optimized away by scalar optimizations. We're better off not
445 touching this loop. */
446 if (!need_to_vectorize)
448 if (vect_print_dump_info (REPORT_DETAILS))
450 "All the computation can be taken out of the loop.");
451 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
453 "not vectorized: redundant loop. no profit to vectorize.");
457 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
458 && vect_print_dump_info (REPORT_DETAILS))
460 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
461 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
463 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
464 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
465 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
466 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
467 * vectorization_factor))))
469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
470 fprintf (vect_dump, "not vectorized: iteration count too small.");
474 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
475 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
476 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
478 if (vect_print_dump_info (REPORT_DETAILS))
479 fprintf (vect_dump, "epilog loop required.");
480 if (!vect_can_advance_ivs_p (loop_vinfo))
482 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
484 "not vectorized: can't create epilog loop 1.");
487 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
489 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
491 "not vectorized: can't create epilog loop 2.");
500 /* Function exist_non_indexing_operands_for_use_p
502 USE is one of the uses attached to STMT. Check if USE is
503 used in STMT for anything other than indexing an array. */
506 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
509 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
511 /* USE corresponds to some operand in STMT. If there is no data
512 reference in STMT, then any operand that corresponds to USE
513 is not indexing an array. */
514 if (!STMT_VINFO_DATA_REF (stmt_info))
517 /* STMT has a data_ref. FORNOW this means that its of one of
521 (This should have been verified in analyze_data_refs).
523 'var' in the second case corresponds to a def, not a use,
524 so USE cannot correspond to any operands that are not used
527 Therefore, all we need to check is if STMT falls into the
528 first case, and whether var corresponds to USE. */
530 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
533 operand = GIMPLE_STMT_OPERAND (stmt, 1);
535 if (TREE_CODE (operand) != SSA_NAME)
545 /* Function vect_analyze_scalar_cycles.
547 Examine the cross iteration def-use cycles of scalar variables, by
548 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
549 following: invariant, induction, reduction, unknown.
551 Some forms of scalar cycles are not yet supported.
553 Example1: reduction: (unsupported yet)
559 Example2: induction: (unsupported yet)
565 Note: the following loop *is* vectorizable:
571 even though it has a def-use cycle caused by the induction variable i:
573 loop: i_2 = PHI (i_0, i_1)
578 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
579 it does not need to be vectorized because it is only used for array
580 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
581 loop2 on the other hand is relevant (it is being written to memory).
585 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
588 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
589 basic_block bb = loop->header;
591 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
593 if (vect_print_dump_info (REPORT_DETAILS))
594 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
596 /* First - identify all inductions. */
597 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
599 tree access_fn = NULL;
600 tree def = PHI_RESULT (phi);
601 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
603 if (vect_print_dump_info (REPORT_DETAILS))
605 fprintf (vect_dump, "Analyze phi: ");
606 print_generic_expr (vect_dump, phi, TDF_SLIM);
609 /* Skip virtual phi's. The data dependences that are associated with
610 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
611 if (!is_gimple_reg (SSA_NAME_VAR (def)))
614 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
616 /* Analyze the evolution function. */
617 access_fn = analyze_scalar_evolution (loop, def);
618 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
620 fprintf (vect_dump, "Access function of PHI: ");
621 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
625 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
627 VEC_safe_push (tree, heap, worklist, phi);
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "Detected induction.");
633 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
637 /* Second - identify all reductions. */
638 while (VEC_length (tree, worklist) > 0)
640 tree phi = VEC_pop (tree, worklist);
641 tree def = PHI_RESULT (phi);
642 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
645 if (vect_print_dump_info (REPORT_DETAILS))
647 fprintf (vect_dump, "Analyze phi: ");
648 print_generic_expr (vect_dump, phi, TDF_SLIM);
651 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
652 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
654 reduc_stmt = vect_is_simple_reduction (loop, phi);
657 if (vect_print_dump_info (REPORT_DETAILS))
658 fprintf (vect_dump, "Detected reduction.");
659 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
660 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
664 if (vect_print_dump_info (REPORT_DETAILS))
665 fprintf (vect_dump, "Unknown def-use cycle pattern.");
668 VEC_free (tree, heap, worklist);
673 /* Function vect_insert_into_interleaving_chain.
675 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
678 vect_insert_into_interleaving_chain (struct data_reference *dra,
679 struct data_reference *drb)
681 tree prev, next, next_init;
682 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
683 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
685 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
686 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
689 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
690 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
693 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
694 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
698 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
701 /* We got to the end of the list. Insert here. */
702 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
703 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
707 /* Function vect_update_interleaving_chain.
709 For two data-refs DRA and DRB that are a part of a chain interleaved data
710 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
712 There are four possible cases:
713 1. New stmts - both DRA and DRB are not a part of any chain:
716 2. DRB is a part of a chain and DRA is not:
717 no need to update FIRST_DR
718 no need to insert DRB
719 insert DRA according to init
720 3. DRA is a part of a chain and DRB is not:
721 if (init of FIRST_DR > init of DRB)
723 NEXT(FIRST_DR) = previous FIRST_DR
725 insert DRB according to its init
726 4. both DRA and DRB are in some interleaving chains:
727 choose the chain with the smallest init of FIRST_DR
728 insert the nodes of the second chain into the first one. */
731 vect_update_interleaving_chain (struct data_reference *drb,
732 struct data_reference *dra)
734 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
735 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
736 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
737 tree node, prev, next, node_init, first_stmt;
739 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
740 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
742 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
743 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
744 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
748 /* 2. DRB is a part of a chain and DRA is not. */
749 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
751 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
752 /* Insert DRA into the chain of DRB. */
753 vect_insert_into_interleaving_chain (dra, drb);
757 /* 3. DRA is a part of a chain and DRB is not. */
758 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
760 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
761 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
765 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
767 /* DRB's init is smaller than the init of the stmt previously marked
768 as the first stmt of the interleaving chain of DRA. Therefore, we
769 update FIRST_STMT and put DRB in the head of the list. */
770 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
771 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
773 /* Update all the stmts in the list to point to the new FIRST_STMT. */
774 tmp = old_first_stmt;
777 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
778 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
783 /* Insert DRB in the list of DRA. */
784 vect_insert_into_interleaving_chain (drb, dra);
785 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
790 /* 4. both DRA and DRB are in some interleaving chains. */
791 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
792 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
793 if (first_a == first_b)
795 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
796 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
798 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
800 /* Insert the nodes of DRA chain into the DRB chain.
801 After inserting a node, continue from this node of the DRB chain (don't
802 start from the beginning. */
803 node = DR_GROUP_FIRST_DR (stmtinfo_a);
804 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
805 first_stmt = first_b;
809 /* Insert the nodes of DRB chain into the DRA chain.
810 After inserting a node, continue from this node of the DRA chain (don't
811 start from the beginning. */
812 node = DR_GROUP_FIRST_DR (stmtinfo_b);
813 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
814 first_stmt = first_a;
819 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
820 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
823 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
824 if (tree_int_cst_compare (next_init, node_init) > 0)
827 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
828 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
833 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
837 /* We got to the end of the list. Insert here. */
838 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
839 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
842 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
843 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
848 /* Function vect_equal_offsets.
850 Check if OFFSET1 and OFFSET2 are identical expressions. */
853 vect_equal_offsets (tree offset1, tree offset2)
857 STRIP_NOPS (offset1);
858 STRIP_NOPS (offset2);
860 if (offset1 == offset2)
863 if (TREE_CODE (offset1) != TREE_CODE (offset2)
864 || !BINARY_CLASS_P (offset1)
865 || !BINARY_CLASS_P (offset2))
868 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
869 TREE_OPERAND (offset2, 0));
870 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
871 TREE_OPERAND (offset2, 1));
873 return (res0 && res1);
877 /* Function vect_check_interleaving.
879 Check if DRA and DRB are a part of interleaving. In case they are, insert
880 DRA and DRB in an interleaving chain. */
883 vect_check_interleaving (struct data_reference *dra,
884 struct data_reference *drb)
886 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
888 /* Check that the data-refs have same first location (except init) and they
889 are both either store or load (not load and store). */
890 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
891 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
892 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
893 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
894 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
895 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
896 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
897 || DR_IS_READ (dra) != DR_IS_READ (drb))
901 1. data-refs are of the same type
902 2. their steps are equal
903 3. the step is greater than the difference between data-refs' inits */
904 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
905 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
907 if (type_size_a != type_size_b
908 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
911 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
912 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
913 step = TREE_INT_CST_LOW (DR_STEP (dra));
917 /* If init_a == init_b + the size of the type * k, we have an interleaving,
918 and DRB is accessed before DRA. */
919 diff_mod_size = (init_a - init_b) % type_size_a;
921 if ((init_a - init_b) > step)
924 if (diff_mod_size == 0)
926 vect_update_interleaving_chain (drb, dra);
927 if (vect_print_dump_info (REPORT_DR_DETAILS))
929 fprintf (vect_dump, "Detected interleaving ");
930 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
931 fprintf (vect_dump, " and ");
932 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
939 /* If init_b == init_a + the size of the type * k, we have an
940 interleaving, and DRA is accessed before DRB. */
941 diff_mod_size = (init_b - init_a) % type_size_a;
943 if ((init_b - init_a) > step)
946 if (diff_mod_size == 0)
948 vect_update_interleaving_chain (dra, drb);
949 if (vect_print_dump_info (REPORT_DR_DETAILS))
951 fprintf (vect_dump, "Detected interleaving ");
952 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
953 fprintf (vect_dump, " and ");
954 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
962 /* Function vect_analyze_data_ref_dependence.
964 Return TRUE if there (might) exist a dependence between a memory-reference
965 DRA and a memory-reference DRB. */
968 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
969 loop_vec_info loop_vinfo)
972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
973 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
974 struct data_reference *dra = DDR_A (ddr);
975 struct data_reference *drb = DDR_B (ddr);
976 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
977 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
978 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
979 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
980 lambda_vector dist_v;
981 unsigned int loop_depth;
983 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
985 /* Independent data accesses. */
986 vect_check_interleaving (dra, drb);
990 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
993 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
995 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
998 "not vectorized: can't determine dependence between ");
999 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1000 fprintf (vect_dump, " and ");
1001 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1006 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1008 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1010 fprintf (vect_dump, "not vectorized: bad dist vector for ");
1011 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1012 fprintf (vect_dump, " and ");
1013 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1018 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1019 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1021 int dist = dist_v[loop_depth];
1023 if (vect_print_dump_info (REPORT_DR_DETAILS))
1024 fprintf (vect_dump, "dependence distance = %d.", dist);
1026 /* Same loop iteration. */
1027 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1029 /* Two references with distance zero have the same alignment. */
1030 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1031 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1032 if (vect_print_dump_info (REPORT_ALIGNMENT))
1033 fprintf (vect_dump, "accesses have the same alignment.");
1034 if (vect_print_dump_info (REPORT_DR_DETAILS))
1036 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1037 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1038 fprintf (vect_dump, " and ");
1039 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1042 /* For interleaving, mark that there is a read-write dependency if
1043 necessary. We check before that one of the data-refs is store. */
1044 if (DR_IS_READ (dra))
1045 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1048 if (DR_IS_READ (drb))
1049 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1055 if (abs (dist) >= vectorization_factor)
1057 /* Dependence distance does not create dependence, as far as vectorization
1058 is concerned, in this case. */
1059 if (vect_print_dump_info (REPORT_DR_DETAILS))
1060 fprintf (vect_dump, "dependence distance >= VF.");
1064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1067 "not vectorized: possible dependence between data-refs ");
1068 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1069 fprintf (vect_dump, " and ");
1070 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1080 /* Function vect_analyze_data_ref_dependences.
1082 Examine all the data references in the loop, and make sure there do not
1083 exist any data dependences between them. */
1086 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1089 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1090 struct data_dependence_relation *ddr;
1092 if (vect_print_dump_info (REPORT_DETAILS))
1093 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1095 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1096 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1103 /* Function vect_compute_data_ref_alignment
1105 Compute the misalignment of the data reference DR.
1108 1. If during the misalignment computation it is found that the data reference
1109 cannot be vectorized then false is returned.
1110 2. DR_MISALIGNMENT (DR) is defined.
1112 FOR NOW: No analysis is actually performed. Misalignment is calculated
1113 only for trivial cases. TODO. */
1116 vect_compute_data_ref_alignment (struct data_reference *dr)
1118 tree stmt = DR_STMT (dr);
1119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1120 tree ref = DR_REF (dr);
1122 tree base, base_addr;
1125 tree aligned_to, alignment;
1127 if (vect_print_dump_info (REPORT_DETAILS))
1128 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1130 /* Initialize misalignment to unknown. */
1131 DR_MISALIGNMENT (dr) = -1;
1133 misalign = DR_OFFSET_MISALIGNMENT (dr);
1134 aligned_to = DR_ALIGNED_TO (dr);
1135 base_addr = DR_BASE_ADDRESS (dr);
1136 base = build_fold_indirect_ref (base_addr);
1137 vectype = STMT_VINFO_VECTYPE (stmt_info);
1138 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1140 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1143 if (vect_print_dump_info (REPORT_DETAILS))
1145 fprintf (vect_dump, "Unknown alignment for access: ");
1146 print_generic_expr (vect_dump, base, TDF_SLIM);
1152 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1154 || (TREE_CODE (base_addr) == SSA_NAME
1155 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1156 TREE_TYPE (base_addr)))),
1158 base_aligned = true;
1160 base_aligned = false;
1164 /* Do not change the alignment of global variables if
1165 flag_section_anchors is enabled. */
1166 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1167 || (TREE_STATIC (base) && flag_section_anchors))
1169 if (vect_print_dump_info (REPORT_DETAILS))
1171 fprintf (vect_dump, "can't force alignment of ref: ");
1172 print_generic_expr (vect_dump, ref, TDF_SLIM);
1177 /* Force the alignment of the decl.
1178 NOTE: This is the only change to the code we make during
1179 the analysis phase, before deciding to vectorize the loop. */
1180 if (vect_print_dump_info (REPORT_DETAILS))
1181 fprintf (vect_dump, "force alignment");
1182 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1183 DECL_USER_ALIGN (base) = 1;
1186 /* At this point we assume that the base is aligned. */
1187 gcc_assert (base_aligned
1188 || (TREE_CODE (base) == VAR_DECL
1189 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1191 /* Modulo alignment. */
1192 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1194 if (!host_integerp (misalign, 1))
1196 /* Negative or overflowed misalignment value. */
1197 if (vect_print_dump_info (REPORT_DETAILS))
1198 fprintf (vect_dump, "unexpected misalign value");
1202 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1204 if (vect_print_dump_info (REPORT_DETAILS))
1206 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1207 print_generic_expr (vect_dump, ref, TDF_SLIM);
1214 /* Function vect_compute_data_refs_alignment
1216 Compute the misalignment of data references in the loop.
1217 Return FALSE if a data reference is found that cannot be vectorized. */
1220 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1222 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1223 struct data_reference *dr;
1226 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1227 if (!vect_compute_data_ref_alignment (dr))
1234 /* Function vect_update_misalignment_for_peel
1236 DR - the data reference whose misalignment is to be adjusted.
1237 DR_PEEL - the data reference whose misalignment is being made
1238 zero in the vector loop by the peel.
1239 NPEEL - the number of iterations in the peel loop if the misalignment
1240 of DR_PEEL is known at compile time. */
1243 vect_update_misalignment_for_peel (struct data_reference *dr,
1244 struct data_reference *dr_peel, int npeel)
1247 VEC(dr_p,heap) *same_align_drs;
1248 struct data_reference *current_dr;
1249 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1250 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1251 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1252 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1254 /* For interleaved data accesses the step in the loop must be multiplied by
1255 the size of the interleaving group. */
1256 if (DR_GROUP_FIRST_DR (stmt_info))
1257 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1258 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1259 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1261 /* It can be assumed that the data refs with the same alignment as dr_peel
1262 are aligned in the vector loop. */
1264 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1265 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1267 if (current_dr != dr)
1269 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1270 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1271 DR_MISALIGNMENT (dr) = 0;
1275 if (known_alignment_for_access_p (dr)
1276 && known_alignment_for_access_p (dr_peel))
1278 DR_MISALIGNMENT (dr) += npeel * dr_size;
1279 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1283 if (vect_print_dump_info (REPORT_DETAILS))
1284 fprintf (vect_dump, "Setting misalignment to -1.");
1285 DR_MISALIGNMENT (dr) = -1;
1289 /* Function vect_verify_datarefs_alignment
1291 Return TRUE if all data references in the loop can be
1292 handled with respect to alignment. */
1295 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1297 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1298 struct data_reference *dr;
1299 enum dr_alignment_support supportable_dr_alignment;
1302 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1304 tree stmt = DR_STMT (dr);
1305 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1307 /* For interleaving, only the alignment of the first access matters. */
1308 if (DR_GROUP_FIRST_DR (stmt_info)
1309 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1312 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1313 if (!supportable_dr_alignment)
1315 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1317 if (DR_IS_READ (dr))
1319 "not vectorized: unsupported unaligned load.");
1322 "not vectorized: unsupported unaligned store.");
1326 if (supportable_dr_alignment != dr_aligned
1327 && vect_print_dump_info (REPORT_ALIGNMENT))
1328 fprintf (vect_dump, "Vectorizing an unaligned access.");
1334 /* Function vect_enhance_data_refs_alignment
1336 This pass will use loop versioning and loop peeling in order to enhance
1337 the alignment of data references in the loop.
1339 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1340 original loop is to be vectorized; Any other loops that are created by
1341 the transformations performed in this pass - are not supposed to be
1342 vectorized. This restriction will be relaxed.
1344 This pass will require a cost model to guide it whether to apply peeling
1345 or versioning or a combination of the two. For example, the scheme that
1346 intel uses when given a loop with several memory accesses, is as follows:
1347 choose one memory access ('p') which alignment you want to force by doing
1348 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1349 other accesses are not necessarily aligned, or (2) use loop versioning to
1350 generate one loop in which all accesses are aligned, and another loop in
1351 which only 'p' is necessarily aligned.
1353 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1354 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1355 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1357 Devising a cost model is the most critical aspect of this work. It will
1358 guide us on which access to peel for, whether to use loop versioning, how
1359 many versions to create, etc. The cost model will probably consist of
1360 generic considerations as well as target specific considerations (on
1361 powerpc for example, misaligned stores are more painful than misaligned
1364 Here are the general steps involved in alignment enhancements:
1366 -- original loop, before alignment analysis:
1367 for (i=0; i<N; i++){
1368 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1369 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1372 -- After vect_compute_data_refs_alignment:
1373 for (i=0; i<N; i++){
1374 x = q[i]; # DR_MISALIGNMENT(q) = 3
1375 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1378 -- Possibility 1: we do loop versioning:
1380 for (i=0; i<N; i++){ # loop 1A
1381 x = q[i]; # DR_MISALIGNMENT(q) = 3
1382 p[i] = y; # DR_MISALIGNMENT(p) = 0
1386 for (i=0; i<N; i++){ # loop 1B
1387 x = q[i]; # DR_MISALIGNMENT(q) = 3
1388 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1392 -- Possibility 2: we do loop peeling:
1393 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1397 for (i = 3; i < N; i++){ # loop 2A
1398 x = q[i]; # DR_MISALIGNMENT(q) = 0
1399 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1402 -- Possibility 3: combination of loop peeling and versioning:
1403 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1408 for (i = 3; i<N; i++){ # loop 3A
1409 x = q[i]; # DR_MISALIGNMENT(q) = 0
1410 p[i] = y; # DR_MISALIGNMENT(p) = 0
1414 for (i = 3; i<N; i++){ # loop 3B
1415 x = q[i]; # DR_MISALIGNMENT(q) = 0
1416 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1420 These loops are later passed to loop_transform to be vectorized. The
1421 vectorizer will use the alignment information to guide the transformation
1422 (whether to generate regular loads/stores, or with special handling for
1426 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1428 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1429 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1430 enum dr_alignment_support supportable_dr_alignment;
1431 struct data_reference *dr0 = NULL;
1432 struct data_reference *dr;
1434 bool do_peeling = false;
1435 bool do_versioning = false;
1438 stmt_vec_info stmt_info;
1440 if (vect_print_dump_info (REPORT_DETAILS))
1441 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1443 /* While cost model enhancements are expected in the future, the high level
1444 view of the code at this time is as follows:
1446 A) If there is a misaligned write then see if peeling to align this write
1447 can make all data references satisfy vect_supportable_dr_alignment.
1448 If so, update data structures as needed and return true. Note that
1449 at this time vect_supportable_dr_alignment is known to return false
1450 for a misaligned write.
1452 B) If peeling wasn't possible and there is a data reference with an
1453 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1454 then see if loop versioning checks can be used to make all data
1455 references satisfy vect_supportable_dr_alignment. If so, update
1456 data structures as needed and return true.
1458 C) If neither peeling nor versioning were successful then return false if
1459 any data reference does not satisfy vect_supportable_dr_alignment.
1461 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1463 Note, Possibility 3 above (which is peeling and versioning together) is not
1464 being done at this time. */
1466 /* (1) Peeling to force alignment. */
1468 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1470 + How many accesses will become aligned due to the peeling
1471 - How many accesses will become unaligned due to the peeling,
1472 and the cost of misaligned accesses.
1473 - The cost of peeling (the extra runtime checks, the increase
1476 The scheme we use FORNOW: peel to force the alignment of the first
1477 misaligned store in the loop.
1478 Rationale: misaligned stores are not yet supported.
1480 TODO: Use a cost model. */
1482 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1484 stmt = DR_STMT (dr);
1485 stmt_info = vinfo_for_stmt (stmt);
1487 /* For interleaving, only the alignment of the first access
1489 if (DR_GROUP_FIRST_DR (stmt_info)
1490 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1493 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1495 if (DR_GROUP_FIRST_DR (stmt_info))
1497 /* For interleaved access we peel only if number of iterations in
1498 the prolog loop ({VF - misalignment}), is a multiple of the
1499 number of the interleaved accesses. */
1500 int elem_size, mis_in_elements;
1501 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1502 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1504 /* FORNOW: handle only known alignment. */
1505 if (!known_alignment_for_access_p (dr))
1511 elem_size = UNITS_PER_SIMD_WORD / nelements;
1512 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1514 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1526 /* Often peeling for alignment will require peeling for loop-bound, which in
1527 turn requires that we know how to adjust the loop ivs after the loop. */
1528 if (!vect_can_advance_ivs_p (loop_vinfo)
1529 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1536 tree stmt = DR_STMT (dr0);
1537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1538 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1539 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1541 if (known_alignment_for_access_p (dr0))
1543 /* Since it's known at compile time, compute the number of iterations
1544 in the peeled loop (the peeling factor) for use in updating
1545 DR_MISALIGNMENT values. The peeling factor is the vectorization
1546 factor minus the misalignment as an element count. */
1547 mis = DR_MISALIGNMENT (dr0);
1548 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1549 npeel = nelements - mis;
1551 /* For interleaved data access every iteration accesses all the
1552 members of the group, therefore we divide the number of iterations
1553 by the group size. */
1554 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1555 if (DR_GROUP_FIRST_DR (stmt_info))
1556 npeel /= DR_GROUP_SIZE (stmt_info);
1558 if (vect_print_dump_info (REPORT_DETAILS))
1559 fprintf (vect_dump, "Try peeling by %d", npeel);
1562 /* Ensure that all data refs can be vectorized after the peel. */
1563 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1565 int save_misalignment;
1570 stmt = DR_STMT (dr);
1571 stmt_info = vinfo_for_stmt (stmt);
1572 /* For interleaving, only the alignment of the first access
1574 if (DR_GROUP_FIRST_DR (stmt_info)
1575 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1578 save_misalignment = DR_MISALIGNMENT (dr);
1579 vect_update_misalignment_for_peel (dr, dr0, npeel);
1580 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1581 DR_MISALIGNMENT (dr) = save_misalignment;
1583 if (!supportable_dr_alignment)
1592 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1593 If the misalignment of DR_i is identical to that of dr0 then set
1594 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1595 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1596 by the peeling factor times the element size of DR_i (MOD the
1597 vectorization factor times the size). Otherwise, the
1598 misalignment of DR_i must be set to unknown. */
1599 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1601 vect_update_misalignment_for_peel (dr, dr0, npeel);
1603 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1604 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1605 DR_MISALIGNMENT (dr0) = 0;
1606 if (vect_print_dump_info (REPORT_ALIGNMENT))
1607 fprintf (vect_dump, "Alignment of access forced using peeling.");
1609 if (vect_print_dump_info (REPORT_DETAILS))
1610 fprintf (vect_dump, "Peeling for alignment will be applied.");
1612 stat = vect_verify_datarefs_alignment (loop_vinfo);
1619 /* (2) Versioning to force alignment. */
1621 /* Try versioning if:
1622 1) flag_tree_vect_loop_version is TRUE
1623 2) optimize_size is FALSE
1624 3) there is at least one unsupported misaligned data ref with an unknown
1626 4) all misaligned data refs with a known misalignment are supported, and
1627 5) the number of runtime alignment checks is within reason. */
1629 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1633 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1635 stmt = DR_STMT (dr);
1636 stmt_info = vinfo_for_stmt (stmt);
1638 /* For interleaving, only the alignment of the first access
1640 if (aligned_access_p (dr)
1641 || (DR_GROUP_FIRST_DR (stmt_info)
1642 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1645 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1647 if (!supportable_dr_alignment)
1653 if (known_alignment_for_access_p (dr)
1654 || VEC_length (tree,
1655 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1656 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1658 do_versioning = false;
1662 stmt = DR_STMT (dr);
1663 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1664 gcc_assert (vectype);
1666 /* The rightmost bits of an aligned address must be zeros.
1667 Construct the mask needed for this test. For example,
1668 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1669 mask must be 15 = 0xf. */
1670 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1672 /* FORNOW: use the same mask to test all potentially unaligned
1673 references in the loop. The vectorizer currently supports
1674 a single vector size, see the reference to
1675 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1676 vectorization factor is computed. */
1677 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1678 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1679 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1680 VEC_safe_push (tree, heap,
1681 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1686 /* Versioning requires at least one misaligned data reference. */
1687 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1688 do_versioning = false;
1689 else if (!do_versioning)
1690 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1695 VEC(tree,heap) *may_misalign_stmts
1696 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1699 /* It can now be assumed that the data references in the statements
1700 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1701 of the loop being vectorized. */
1702 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1704 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1705 dr = STMT_VINFO_DATA_REF (stmt_info);
1706 DR_MISALIGNMENT (dr) = 0;
1707 if (vect_print_dump_info (REPORT_ALIGNMENT))
1708 fprintf (vect_dump, "Alignment of access forced using versioning.");
1711 if (vect_print_dump_info (REPORT_DETAILS))
1712 fprintf (vect_dump, "Versioning for alignment will be applied.");
1714 /* Peeling and versioning can't be done together at this time. */
1715 gcc_assert (! (do_peeling && do_versioning));
1717 stat = vect_verify_datarefs_alignment (loop_vinfo);
1722 /* This point is reached if neither peeling nor versioning is being done. */
1723 gcc_assert (! (do_peeling || do_versioning));
1725 stat = vect_verify_datarefs_alignment (loop_vinfo);
1730 /* Function vect_analyze_data_refs_alignment
1732 Analyze the alignment of the data-references in the loop.
1733 Return FALSE if a data reference is found that cannot be vectorized. */
1736 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1738 if (vect_print_dump_info (REPORT_DETAILS))
1739 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1741 if (!vect_compute_data_refs_alignment (loop_vinfo))
1743 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1745 "not vectorized: can't calculate alignment for data ref.");
1753 /* Function vect_analyze_data_ref_access.
1755 Analyze the access pattern of the data-reference DR. For now, a data access
1756 has to be consecutive to be considered vectorizable. */
1759 vect_analyze_data_ref_access (struct data_reference *dr)
1761 tree step = DR_STEP (dr);
1762 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1763 tree scalar_type = TREE_TYPE (DR_REF (dr));
1764 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1765 tree stmt = DR_STMT (dr);
1766 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1767 interleaving group (including gaps). */
1768 HOST_WIDE_INT stride = dr_step / type_size;
1772 if (vect_print_dump_info (REPORT_DETAILS))
1773 fprintf (vect_dump, "bad data-ref access");
1778 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1780 /* Mark that it is not interleaving. */
1781 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1785 /* Not consecutive access is possible only if it is a part of interleaving. */
1786 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1788 /* Check if it this DR is a part of interleaving, and is a single
1789 element of the group that is accessed in the loop. */
1791 /* Gaps are supported only for loads. STEP must be a multiple of the type
1792 size. The size of the group must be a power of 2. */
1794 && (dr_step % type_size) == 0
1796 && exact_log2 (stride) != -1)
1798 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1799 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1800 if (vect_print_dump_info (REPORT_DR_DETAILS))
1802 fprintf (vect_dump, "Detected single element interleaving %d ",
1803 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1804 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1805 fprintf (vect_dump, " step ");
1806 print_generic_expr (vect_dump, step, TDF_SLIM);
1810 if (vect_print_dump_info (REPORT_DETAILS))
1811 fprintf (vect_dump, "not consecutive access");
1815 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1817 /* First stmt in the interleaving chain. Check the chain. */
1818 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1819 struct data_reference *data_ref = dr;
1820 unsigned int count = 1;
1822 tree prev_init = DR_INIT (data_ref);
1824 HOST_WIDE_INT diff, count_in_bytes;
1828 /* Skip same data-refs. In case that two or more stmts share data-ref
1829 (supported only for loads), we vectorize only the first stmt, and
1830 the rest get their vectorized loads from the first one. */
1831 if (!tree_int_cst_compare (DR_INIT (data_ref),
1832 DR_INIT (STMT_VINFO_DATA_REF (
1833 vinfo_for_stmt (next)))))
1835 if (!DR_IS_READ (data_ref))
1837 if (vect_print_dump_info (REPORT_DETAILS))
1838 fprintf (vect_dump, "Two store stmts share the same dr.");
1842 /* Check that there is no load-store dependencies for this loads
1843 to prevent a case of load-store-load to the same location. */
1844 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1845 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1847 if (vect_print_dump_info (REPORT_DETAILS))
1849 "READ_WRITE dependence in interleaving.");
1853 /* For load use the same data-ref load. */
1854 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1857 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1862 /* Check that all the accesses have the same STEP. */
1863 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1864 if (tree_int_cst_compare (step, next_step))
1866 if (vect_print_dump_info (REPORT_DETAILS))
1867 fprintf (vect_dump, "not consecutive access in interleaving");
1871 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1872 /* Check that the distance between two accesses is equal to the type
1873 size. Otherwise, we have gaps. */
1874 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1875 - TREE_INT_CST_LOW (prev_init)) / type_size;
1876 if (!DR_IS_READ (data_ref) && diff != 1)
1878 if (vect_print_dump_info (REPORT_DETAILS))
1879 fprintf (vect_dump, "interleaved store with gaps");
1882 /* Store the gap from the previous member of the group. If there is no
1883 gap in the access, DR_GROUP_GAP is always 1. */
1884 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1886 prev_init = DR_INIT (data_ref);
1887 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1888 /* Count the number of data-refs in the chain. */
1892 /* COUNT is the number of accesses found, we multiply it by the size of
1893 the type to get COUNT_IN_BYTES. */
1894 count_in_bytes = type_size * count;
1896 /* Check that the size of the interleaving is not greater than STEP. */
1897 if (dr_step < count_in_bytes)
1899 if (vect_print_dump_info (REPORT_DETAILS))
1901 fprintf (vect_dump, "interleaving size is greater than step for ");
1902 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1907 /* Check that the size of the interleaving is equal to STEP for stores,
1908 i.e., that there are no gaps. */
1909 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1911 if (vect_print_dump_info (REPORT_DETAILS))
1912 fprintf (vect_dump, "interleaved store with gaps");
1916 /* Check that STEP is a multiple of type size. */
1917 if ((dr_step % type_size) != 0)
1919 if (vect_print_dump_info (REPORT_DETAILS))
1921 fprintf (vect_dump, "step is not a multiple of type size: step ");
1922 print_generic_expr (vect_dump, step, TDF_SLIM);
1923 fprintf (vect_dump, " size ");
1924 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1930 /* FORNOW: we handle only interleaving that is a power of 2. */
1931 if (exact_log2 (stride) == -1)
1933 if (vect_print_dump_info (REPORT_DETAILS))
1934 fprintf (vect_dump, "interleaving is not a power of 2");
1937 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1943 /* Function vect_analyze_data_ref_accesses.
1945 Analyze the access pattern of all the data references in the loop.
1947 FORNOW: the only access pattern that is considered vectorizable is a
1948 simple step 1 (consecutive) access.
1950 FORNOW: handle only arrays and pointer accesses. */
1953 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1956 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1957 struct data_reference *dr;
1959 if (vect_print_dump_info (REPORT_DETAILS))
1960 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1962 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1963 if (!vect_analyze_data_ref_access (dr))
1965 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1966 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1974 /* Function vect_analyze_data_refs.
1976 Find all the data references in the loop.
1978 The general structure of the analysis of data refs in the vectorizer is as
1980 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1981 find and analyze all data-refs in the loop and their dependences.
1982 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1983 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1984 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1989 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1991 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1993 VEC (data_reference_p, heap) *datarefs;
1994 struct data_reference *dr;
1997 if (vect_print_dump_info (REPORT_DETAILS))
1998 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2000 compute_data_dependences_for_loop (loop, true,
2001 &LOOP_VINFO_DATAREFS (loop_vinfo),
2002 &LOOP_VINFO_DDRS (loop_vinfo));
2004 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2005 from stmt_vec_info struct to DR and vectype. */
2006 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2008 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2011 stmt_vec_info stmt_info;
2013 if (!dr || !DR_REF (dr))
2015 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2016 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2020 /* Update DR field in stmt_vec_info struct. */
2021 stmt = DR_STMT (dr);
2022 stmt_info = vinfo_for_stmt (stmt);
2024 if (STMT_VINFO_DATA_REF (stmt_info))
2026 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2029 "not vectorized: more than one data ref in stmt: ");
2030 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2034 STMT_VINFO_DATA_REF (stmt_info) = dr;
2036 /* Check that analysis of the data-ref succeeded. */
2037 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2040 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2042 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2043 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2047 if (!DR_MEMTAG (dr))
2049 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2051 fprintf (vect_dump, "not vectorized: no memory tag for ");
2052 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2057 /* Set vectype for STMT. */
2058 scalar_type = TREE_TYPE (DR_REF (dr));
2059 STMT_VINFO_VECTYPE (stmt_info) =
2060 get_vectype_for_scalar_type (scalar_type);
2061 if (!STMT_VINFO_VECTYPE (stmt_info))
2063 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2066 "not vectorized: no vectype for stmt: ");
2067 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2068 fprintf (vect_dump, " scalar_type: ");
2069 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2079 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2081 /* Function vect_mark_relevant.
2083 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2086 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2087 enum vect_relevant relevant, bool live_p)
2089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2090 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2091 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2093 if (vect_print_dump_info (REPORT_DETAILS))
2094 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2096 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2100 /* This is the last stmt in a sequence that was detected as a
2101 pattern that can potentially be vectorized. Don't mark the stmt
2102 as relevant/live because it's not going to vectorized.
2103 Instead mark the pattern-stmt that replaces it. */
2104 if (vect_print_dump_info (REPORT_DETAILS))
2105 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2106 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2107 stmt_info = vinfo_for_stmt (pattern_stmt);
2108 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2109 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2110 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2111 stmt = pattern_stmt;
2114 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2115 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2116 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2118 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2119 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2121 if (vect_print_dump_info (REPORT_DETAILS))
2122 fprintf (vect_dump, "already marked relevant/live.");
2126 VEC_safe_push (tree, heap, *worklist, stmt);
2130 /* Function vect_stmt_relevant_p.
2132 Return true if STMT in loop that is represented by LOOP_VINFO is
2133 "relevant for vectorization".
2135 A stmt is considered "relevant for vectorization" if:
2136 - it has uses outside the loop.
2137 - it has vdefs (it alters memory).
2138 - control stmts in the loop (except for the exit condition).
2140 CHECKME: what other side effects would the vectorizer allow? */
2143 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2144 enum vect_relevant *relevant, bool *live_p)
2146 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2147 ssa_op_iter op_iter;
2148 imm_use_iterator imm_iter;
2149 use_operand_p use_p;
2150 def_operand_p def_p;
2152 *relevant = vect_unused_in_loop;
2155 /* cond stmt other than loop exit cond. */
2156 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2157 *relevant = vect_used_in_loop;
2159 /* changing memory. */
2160 if (TREE_CODE (stmt) != PHI_NODE)
2161 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2165 *relevant = vect_used_in_loop;
2168 /* uses outside the loop. */
2169 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2171 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2173 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2174 if (!flow_bb_inside_loop_p (loop, bb))
2176 if (vect_print_dump_info (REPORT_DETAILS))
2177 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2179 /* We expect all such uses to be in the loop exit phis
2180 (because of loop closed form) */
2181 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2182 gcc_assert (bb == single_exit (loop)->dest);
2189 return (*live_p || *relevant);
2194 Function process_use.
2197 - a USE in STMT in a loop represented by LOOP_VINFO
2198 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2199 that defined USE. This is dont by calling mark_relevant and passing it
2200 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2203 Generally, LIVE_P and RELEVANT are used to define the liveness and
2204 relevance info of the DEF_STMT of this USE:
2205 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2206 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2208 - case 1: If USE is used only for address computations (e.g. array indexing),
2209 which does not need to be directly vectorized, then the liveness/relevance
2210 of the respective DEF_STMT is left unchanged.
2211 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2212 skip DEF_STMT cause it had already been processed.
2214 Return true if everyting is as expected. Return false otherwise. */
2217 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
2218 enum vect_relevant relevant, VEC(tree,heap) **worklist)
2220 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2221 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2222 stmt_vec_info dstmt_vinfo;
2225 enum vect_def_type dt;
2227 /* case 1: we are only interested in uses that need to be vectorized. Uses
2228 that are used for address computation are not considered relevant. */
2229 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2232 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2234 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2235 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2239 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2242 def_bb = bb_for_stmt (def_stmt);
2243 if (!flow_bb_inside_loop_p (loop, def_bb))
2246 /* case 2: A reduction phi defining a reduction stmt (DEF_STMT). DEF_STMT
2247 must have already been processed, so we just check that everything is as
2248 expected, and we are done. */
2249 dstmt_vinfo = vinfo_for_stmt (def_stmt);
2250 if (TREE_CODE (stmt) == PHI_NODE
2251 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2252 && TREE_CODE (def_stmt) != PHI_NODE
2253 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def)
2255 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
2256 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
2257 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
2258 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
2259 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
2263 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
2268 /* Function vect_mark_stmts_to_be_vectorized.
2270 Not all stmts in the loop need to be vectorized. For example:
2279 Stmt 1 and 3 do not need to be vectorized, because loop control and
2280 addressing of vectorized data-refs are handled differently.
2282 This pass detects such stmts. */
2285 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2287 VEC(tree,heap) *worklist;
2288 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2289 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2290 unsigned int nbbs = loop->num_nodes;
2291 block_stmt_iterator si;
2295 stmt_vec_info stmt_vinfo;
2299 enum vect_relevant relevant;
2301 if (vect_print_dump_info (REPORT_DETAILS))
2302 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2304 worklist = VEC_alloc (tree, heap, 64);
2306 /* 1. Init worklist. */
2307 for (i = 0; i < nbbs; i++)
2310 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2312 if (vect_print_dump_info (REPORT_DETAILS))
2314 fprintf (vect_dump, "init: phi relevant? ");
2315 print_generic_expr (vect_dump, phi, TDF_SLIM);
2318 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2319 vect_mark_relevant (&worklist, phi, relevant, live_p);
2321 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2323 stmt = bsi_stmt (si);
2324 if (vect_print_dump_info (REPORT_DETAILS))
2326 fprintf (vect_dump, "init: stmt relevant? ");
2327 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2330 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2331 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2335 /* 2. Process_worklist */
2336 while (VEC_length (tree, worklist) > 0)
2338 use_operand_p use_p;
2341 stmt = VEC_pop (tree, worklist);
2342 if (vect_print_dump_info (REPORT_DETAILS))
2344 fprintf (vect_dump, "worklist: examine stmt: ");
2345 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2348 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2349 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2350 liveness and relevance properties of STMT. */
2351 ann = stmt_ann (stmt);
2352 stmt_vinfo = vinfo_for_stmt (stmt);
2353 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2354 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2356 /* Generally, the liveness and relevance properties of STMT are
2357 propagated as is to the DEF_STMTs of its USEs:
2358 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2359 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2361 One exception is when STMT has been identified as defining a reduction
2362 variable; in this case we set the liveness/relevance as follows:
2364 relevant = vect_used_by_reduction
2365 This is because we distinguish between two kinds of relevant stmts -
2366 those that are used by a reduction computation, and those that are
2367 (also) used by a regular computation. This allows us later on to
2368 identify stmts that are used solely by a reduction, and therefore the
2369 order of the results that they produce does not have to be kept.
2371 Reduction phis are expected to be used by a reduction stmt; Other
2372 reduction stmts are expected to be unused in the loop. These are the
2373 expected values of "relevant" for reduction phis/stmts in the loop:
2376 vect_unused_in_loop ok
2377 vect_used_by_reduction ok
2378 vect_used_in_loop */
2380 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2384 case vect_unused_in_loop:
2385 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2387 case vect_used_by_reduction:
2388 if (TREE_CODE (stmt) == PHI_NODE)
2390 case vect_used_in_loop:
2392 if (vect_print_dump_info (REPORT_DETAILS))
2393 fprintf (vect_dump, "unsupported use of reduction.");
2394 VEC_free (tree, heap, worklist);
2397 relevant = vect_used_by_reduction;
2401 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2403 tree op = USE_FROM_PTR (use_p);
2404 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
2406 VEC_free (tree, heap, worklist);
2410 } /* while worklist */
2412 VEC_free (tree, heap, worklist);
2417 /* Function vect_can_advance_ivs_p
2419 In case the number of iterations that LOOP iterates is unknown at compile
2420 time, an epilog loop will be generated, and the loop induction variables
2421 (IVs) will be "advanced" to the value they are supposed to take just before
2422 the epilog loop. Here we check that the access function of the loop IVs
2423 and the expression that represents the loop bound are simple enough.
2424 These restrictions will be relaxed in the future. */
2427 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2429 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2430 basic_block bb = loop->header;
2433 /* Analyze phi functions of the loop header. */
2435 if (vect_print_dump_info (REPORT_DETAILS))
2436 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2438 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2440 tree access_fn = NULL;
2441 tree evolution_part;
2443 if (vect_print_dump_info (REPORT_DETAILS))
2445 fprintf (vect_dump, "Analyze phi: ");
2446 print_generic_expr (vect_dump, phi, TDF_SLIM);
2449 /* Skip virtual phi's. The data dependences that are associated with
2450 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2452 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "virtual phi. skip.");
2459 /* Skip reduction phis. */
2461 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2463 if (vect_print_dump_info (REPORT_DETAILS))
2464 fprintf (vect_dump, "reduc phi. skip.");
2468 /* Analyze the evolution function. */
2470 access_fn = instantiate_parameters
2471 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2475 if (vect_print_dump_info (REPORT_DETAILS))
2476 fprintf (vect_dump, "No Access function.");
2480 if (vect_print_dump_info (REPORT_DETAILS))
2482 fprintf (vect_dump, "Access function of PHI: ");
2483 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2486 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2488 if (evolution_part == NULL_TREE)
2490 if (vect_print_dump_info (REPORT_DETAILS))
2491 fprintf (vect_dump, "No evolution.");
2495 /* FORNOW: We do not transform initial conditions of IVs
2496 which evolution functions are a polynomial of degree >= 2. */
2498 if (tree_is_chrec (evolution_part))
2506 /* Function vect_get_loop_niters.
2508 Determine how many iterations the loop is executed.
2509 If an expression that represents the number of iterations
2510 can be constructed, place it in NUMBER_OF_ITERATIONS.
2511 Return the loop exit condition. */
2514 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2518 if (vect_print_dump_info (REPORT_DETAILS))
2519 fprintf (vect_dump, "=== get_loop_niters ===");
2521 niters = number_of_exit_cond_executions (loop);
2523 if (niters != NULL_TREE
2524 && niters != chrec_dont_know)
2526 *number_of_iterations = niters;
2528 if (vect_print_dump_info (REPORT_DETAILS))
2530 fprintf (vect_dump, "==> get_loop_niters:" );
2531 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2535 return get_loop_exit_condition (loop);
2539 /* Function vect_analyze_loop_form.
2541 Verify the following restrictions (some may be relaxed in the future):
2542 - it's an inner-most loop
2543 - number of BBs = 2 (which are the loop header and the latch)
2544 - the loop has a pre-header
2545 - the loop has a single entry and exit
2546 - the loop exit condition is simple enough, and the number of iterations
2547 can be analyzed (a countable loop). */
2549 static loop_vec_info
2550 vect_analyze_loop_form (struct loop *loop)
2552 loop_vec_info loop_vinfo;
2554 tree number_of_iterations = NULL;
2556 if (vect_print_dump_info (REPORT_DETAILS))
2557 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2561 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2562 fprintf (vect_dump, "not vectorized: nested loop.");
2566 if (!single_exit (loop)
2567 || loop->num_nodes != 2
2568 || EDGE_COUNT (loop->header->preds) != 2)
2570 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2572 if (!single_exit (loop))
2573 fprintf (vect_dump, "not vectorized: multiple exits.");
2574 else if (loop->num_nodes != 2)
2575 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2576 else if (EDGE_COUNT (loop->header->preds) != 2)
2577 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2583 /* We assume that the loop exit condition is at the end of the loop. i.e,
2584 that the loop is represented as a do-while (with a proper if-guard
2585 before the loop if needed), where the loop header contains all the
2586 executable statements, and the latch is empty. */
2587 if (!empty_block_p (loop->latch)
2588 || phi_nodes (loop->latch))
2590 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2591 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2595 /* Make sure there exists a single-predecessor exit bb: */
2596 if (!single_pred_p (single_exit (loop)->dest))
2598 edge e = single_exit (loop);
2599 if (!(e->flags & EDGE_ABNORMAL))
2601 split_loop_exit_edge (e);
2602 if (vect_print_dump_info (REPORT_DETAILS))
2603 fprintf (vect_dump, "split exit edge.");
2607 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2608 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2613 if (empty_block_p (loop->header))
2615 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2616 fprintf (vect_dump, "not vectorized: empty loop.");
2620 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2623 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2624 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2628 if (!number_of_iterations)
2630 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2632 "not vectorized: number of iterations cannot be computed.");
2636 if (chrec_contains_undetermined (number_of_iterations))
2638 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2639 fprintf (vect_dump, "Infinite number of iterations.");
2643 if (!NITERS_KNOWN_P (number_of_iterations))
2645 if (vect_print_dump_info (REPORT_DETAILS))
2647 fprintf (vect_dump, "Symbolic number of iterations is ");
2648 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2651 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
2653 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2654 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2658 loop_vinfo = new_loop_vec_info (loop);
2659 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2660 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2662 gcc_assert (!loop->aux);
2663 loop->aux = loop_vinfo;
2668 /* Function vect_analyze_loop.
2670 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2671 for it. The different analyses will record information in the
2672 loop_vec_info struct. */
2674 vect_analyze_loop (struct loop *loop)
2677 loop_vec_info loop_vinfo;
2679 if (vect_print_dump_info (REPORT_DETAILS))
2680 fprintf (vect_dump, "===== analyze_loop_nest =====");
2682 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2684 loop_vinfo = vect_analyze_loop_form (loop);
2687 if (vect_print_dump_info (REPORT_DETAILS))
2688 fprintf (vect_dump, "bad loop form.");
2692 /* Find all data references in the loop (which correspond to vdefs/vuses)
2693 and analyze their evolution in the loop.
2695 FORNOW: Handle only simple, array references, which
2696 alignment can be forced, and aligned pointer-references. */
2698 ok = vect_analyze_data_refs (loop_vinfo);
2701 if (vect_print_dump_info (REPORT_DETAILS))
2702 fprintf (vect_dump, "bad data references.");
2703 destroy_loop_vec_info (loop_vinfo);
2707 /* Classify all cross-iteration scalar data-flow cycles.
2708 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2710 vect_analyze_scalar_cycles (loop_vinfo);
2712 vect_pattern_recog (loop_vinfo);
2714 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2716 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2719 if (vect_print_dump_info (REPORT_DETAILS))
2720 fprintf (vect_dump, "unexpected pattern.");
2721 destroy_loop_vec_info (loop_vinfo);
2725 /* Analyze the alignment of the data-refs in the loop.
2726 Fail if a data reference is found that cannot be vectorized. */
2728 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2731 if (vect_print_dump_info (REPORT_DETAILS))
2732 fprintf (vect_dump, "bad data alignment.");
2733 destroy_loop_vec_info (loop_vinfo);
2737 ok = vect_determine_vectorization_factor (loop_vinfo);
2740 if (vect_print_dump_info (REPORT_DETAILS))
2741 fprintf (vect_dump, "can't determine vectorization factor.");
2742 destroy_loop_vec_info (loop_vinfo);
2746 /* Analyze data dependences between the data-refs in the loop.
2747 FORNOW: fail at the first data dependence that we encounter. */
2749 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2752 if (vect_print_dump_info (REPORT_DETAILS))
2753 fprintf (vect_dump, "bad data dependence.");
2754 destroy_loop_vec_info (loop_vinfo);
2758 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2759 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2761 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2764 if (vect_print_dump_info (REPORT_DETAILS))
2765 fprintf (vect_dump, "bad data access.");
2766 destroy_loop_vec_info (loop_vinfo);
2770 /* This pass will decide on using loop versioning and/or loop peeling in
2771 order to enhance the alignment of data references in the loop. */
2773 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2776 if (vect_print_dump_info (REPORT_DETAILS))
2777 fprintf (vect_dump, "bad data alignment.");
2778 destroy_loop_vec_info (loop_vinfo);
2782 /* Scan all the operations in the loop and make sure they are
2785 ok = vect_analyze_operations (loop_vinfo);
2788 if (vect_print_dump_info (REPORT_DETAILS))
2789 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2790 destroy_loop_vec_info (loop_vinfo);
2794 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;