1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
104 stmt_vec_info stmt_info;
107 if (vect_print_dump_info (REPORT_DETAILS))
108 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
110 for (i = 0; i < nbbs; i++)
112 basic_block bb = bbs[i];
114 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
116 stmt_info = vinfo_for_stmt (phi);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining phi: ");
120 print_generic_expr (vect_dump, phi, TDF_SLIM);
123 gcc_assert (stmt_info);
125 if (STMT_VINFO_RELEVANT_P (stmt_info))
127 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
128 scalar_type = TREE_TYPE (PHI_RESULT (phi));
130 if (vect_print_dump_info (REPORT_DETAILS))
132 fprintf (vect_dump, "get vectype for scalar type: ");
133 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
136 vectype = get_vectype_for_scalar_type (scalar_type);
139 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
142 "not vectorized: unsupported data-type ");
143 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
147 STMT_VINFO_VECTYPE (stmt_info) = vectype;
149 if (vect_print_dump_info (REPORT_DETAILS))
151 fprintf (vect_dump, "vectype: ");
152 print_generic_expr (vect_dump, vectype, TDF_SLIM);
155 nunits = TYPE_VECTOR_SUBPARTS (vectype);
156 if (vect_print_dump_info (REPORT_DETAILS))
157 fprintf (vect_dump, "nunits = %d", nunits);
159 if (!vectorization_factor
160 || (nunits > vectorization_factor))
161 vectorization_factor = nunits;
165 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
167 tree stmt = bsi_stmt (si);
168 stmt_info = vinfo_for_stmt (stmt);
170 if (vect_print_dump_info (REPORT_DETAILS))
172 fprintf (vect_dump, "==> examining statement: ");
173 print_generic_expr (vect_dump, stmt, TDF_SLIM);
176 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
179 gcc_assert (stmt_info);
181 /* skip stmts which do not need to be vectorized. */
182 if (!STMT_VINFO_RELEVANT_P (stmt_info)
183 && !STMT_VINFO_LIVE_P (stmt_info))
185 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "skip.");
190 if (!GIMPLE_STMT_P (stmt)
191 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
193 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
195 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
196 print_generic_expr (vect_dump, stmt, TDF_SLIM);
201 if (STMT_VINFO_VECTYPE (stmt_info))
203 /* The only case when a vectype had been already set is for stmts
204 that contain a dataref, or for "pattern-stmts" (stmts generated
205 by the vectorizer to represent/replace a certain idiom). */
206 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
207 || is_pattern_stmt_p (stmt_info));
208 vectype = STMT_VINFO_VECTYPE (stmt_info);
212 gcc_assert (! STMT_VINFO_DATA_REF (stmt_info)
213 && !is_pattern_stmt_p (stmt_info));
215 /* We set the vectype according to the type of the result (lhs).
216 For stmts whose result-type is different than the type of the
217 arguments (e.g. demotion, promotion), vectype will be reset
218 appropriately (later). Note that we have to visit the smallest
219 datatype in this function, because that determines the VF.
220 If the smallest datatype in the loop is present only as the
221 rhs of a promotion operation - we'd miss it here.
222 However, in such a case, that a variable of this datatype
223 does not appear in the lhs anywhere in the loop, it shouldn't
224 affect the vectorization factor. */
225 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
227 if (vect_print_dump_info (REPORT_DETAILS))
229 fprintf (vect_dump, "get vectype for scalar type: ");
230 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
233 vectype = get_vectype_for_scalar_type (scalar_type);
236 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
239 "not vectorized: unsupported data-type ");
240 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
244 STMT_VINFO_VECTYPE (stmt_info) = vectype;
247 if (vect_print_dump_info (REPORT_DETAILS))
249 fprintf (vect_dump, "vectype: ");
250 print_generic_expr (vect_dump, vectype, TDF_SLIM);
253 nunits = TYPE_VECTOR_SUBPARTS (vectype);
254 if (vect_print_dump_info (REPORT_DETAILS))
255 fprintf (vect_dump, "nunits = %d", nunits);
257 if (!vectorization_factor
258 || (nunits > vectorization_factor))
259 vectorization_factor = nunits;
264 /* TODO: Analyze cost. Decide if worth while to vectorize. */
265 if (vect_print_dump_info (REPORT_DETAILS))
266 fprintf (vect_dump, "vectorization factor = %d", vectorization_factor);
267 if (vectorization_factor <= 1)
269 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
270 fprintf (vect_dump, "not vectorized: unsupported data-type");
273 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
279 /* Function vect_analyze_operations.
281 Scan the loop stmts and make sure they are all vectorizable. */
284 vect_analyze_operations (loop_vec_info loop_vinfo)
286 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
287 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
288 int nbbs = loop->num_nodes;
289 block_stmt_iterator si;
290 unsigned int vectorization_factor = 0;
294 stmt_vec_info stmt_info;
295 bool need_to_vectorize = false;
297 if (vect_print_dump_info (REPORT_DETAILS))
298 fprintf (vect_dump, "=== vect_analyze_operations ===");
300 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
301 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
303 for (i = 0; i < nbbs; i++)
305 basic_block bb = bbs[i];
307 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
311 stmt_info = vinfo_for_stmt (phi);
312 if (vect_print_dump_info (REPORT_DETAILS))
314 fprintf (vect_dump, "examining phi: ");
315 print_generic_expr (vect_dump, phi, TDF_SLIM);
318 gcc_assert (stmt_info);
320 if (STMT_VINFO_LIVE_P (stmt_info))
322 /* FORNOW: not yet supported. */
323 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
324 fprintf (vect_dump, "not vectorized: value used after loop.");
328 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_loop
329 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
331 /* A scalar-dependence cycle that we don't support. */
332 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
333 fprintf (vect_dump, "not vectorized: scalar dependence cycle.");
337 if (STMT_VINFO_RELEVANT_P (stmt_info))
339 need_to_vectorize = true;
340 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
341 ok = vectorizable_induction (phi, NULL, NULL);
346 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
349 "not vectorized: relevant phi not supported: ");
350 print_generic_expr (vect_dump, phi, TDF_SLIM);
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
358 tree stmt = bsi_stmt (si);
359 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
360 enum vect_def_type relevance = STMT_VINFO_RELEVANT (stmt_info);
362 if (vect_print_dump_info (REPORT_DETAILS))
364 fprintf (vect_dump, "==> examining statement: ");
365 print_generic_expr (vect_dump, stmt, TDF_SLIM);
368 gcc_assert (stmt_info);
370 /* skip stmts which do not need to be vectorized.
371 this is expected to include:
372 - the COND_EXPR which is the loop exit condition
373 - any LABEL_EXPRs in the loop
374 - computations that are used only for array indexing or loop
377 if (!STMT_VINFO_RELEVANT_P (stmt_info)
378 && !STMT_VINFO_LIVE_P (stmt_info))
380 if (vect_print_dump_info (REPORT_DETAILS))
381 fprintf (vect_dump, "irrelevant.");
385 switch (STMT_VINFO_DEF_TYPE (stmt_info))
390 case vect_reduction_def:
391 gcc_assert (relevance == vect_unused_in_loop);
394 case vect_induction_def:
395 case vect_constant_def:
396 case vect_invariant_def:
397 case vect_unknown_def_type:
402 if (STMT_VINFO_RELEVANT_P (stmt_info))
404 gcc_assert (GIMPLE_STMT_P (stmt)
405 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
406 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
407 need_to_vectorize = true;
410 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
411 || vectorizable_type_demotion (stmt, NULL, NULL)
412 || vectorizable_conversion (stmt, NULL, NULL)
413 || vectorizable_operation (stmt, NULL, NULL)
414 || vectorizable_assignment (stmt, NULL, NULL)
415 || vectorizable_load (stmt, NULL, NULL)
416 || vectorizable_call (stmt, NULL, NULL)
417 || vectorizable_store (stmt, NULL, NULL)
418 || vectorizable_condition (stmt, NULL, NULL)
419 || vectorizable_reduction (stmt, NULL, NULL));
421 /* Stmts that are (also) "live" (i.e. - that are used out of the loop)
422 need extra handling, except for vectorizable reductions. */
423 if (STMT_VINFO_LIVE_P (stmt_info)
424 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
425 ok |= vectorizable_live_operation (stmt, NULL, NULL);
429 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
431 fprintf (vect_dump, "not vectorized: stmt not supported: ");
432 print_generic_expr (vect_dump, stmt, TDF_SLIM);
439 /* TODO: Analyze cost. Decide if worth while to vectorize. */
441 /* All operations in the loop are either irrelevant (deal with loop
442 control, or dead), or only used outside the loop and can be moved
443 out of the loop (e.g. invariants, inductions). The loop can be
444 optimized away by scalar optimizations. We're better off not
445 touching this loop. */
446 if (!need_to_vectorize)
448 if (vect_print_dump_info (REPORT_DETAILS))
450 "All the computation can be taken out of the loop.");
451 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
453 "not vectorized: redundant loop. no profit to vectorize.");
457 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
458 && vect_print_dump_info (REPORT_DETAILS))
460 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
461 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
463 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
464 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
465 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
466 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
467 * vectorization_factor))))
469 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
470 fprintf (vect_dump, "not vectorized: iteration count too small.");
474 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
475 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
476 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
478 if (vect_print_dump_info (REPORT_DETAILS))
479 fprintf (vect_dump, "epilog loop required.");
480 if (!vect_can_advance_ivs_p (loop_vinfo))
482 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
484 "not vectorized: can't create epilog loop 1.");
487 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
489 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
491 "not vectorized: can't create epilog loop 2.");
500 /* Function exist_non_indexing_operands_for_use_p
502 USE is one of the uses attached to STMT. Check if USE is
503 used in STMT for anything other than indexing an array. */
506 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
509 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
511 /* USE corresponds to some operand in STMT. If there is no data
512 reference in STMT, then any operand that corresponds to USE
513 is not indexing an array. */
514 if (!STMT_VINFO_DATA_REF (stmt_info))
517 /* STMT has a data_ref. FORNOW this means that its of one of
521 (This should have been verified in analyze_data_refs).
523 'var' in the second case corresponds to a def, not a use,
524 so USE cannot correspond to any operands that are not used
527 Therefore, all we need to check is if STMT falls into the
528 first case, and whether var corresponds to USE. */
530 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
533 operand = GIMPLE_STMT_OPERAND (stmt, 1);
535 if (TREE_CODE (operand) != SSA_NAME)
545 /* Function vect_analyze_scalar_cycles.
547 Examine the cross iteration def-use cycles of scalar variables, by
548 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
549 following: invariant, induction, reduction, unknown.
551 Some forms of scalar cycles are not yet supported.
553 Example1: reduction: (unsupported yet)
559 Example2: induction: (unsupported yet)
565 Note: the following loop *is* vectorizable:
571 even though it has a def-use cycle caused by the induction variable i:
573 loop: i_2 = PHI (i_0, i_1)
578 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
579 it does not need to be vectorized because it is only used for array
580 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
581 loop2 on the other hand is relevant (it is being written to memory).
585 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
588 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
589 basic_block bb = loop->header;
591 VEC(tree,heap) *worklist = VEC_alloc (tree, heap, 64);
593 if (vect_print_dump_info (REPORT_DETAILS))
594 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
596 /* First - identify all inductions. */
597 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
599 tree access_fn = NULL;
600 tree def = PHI_RESULT (phi);
601 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
603 if (vect_print_dump_info (REPORT_DETAILS))
605 fprintf (vect_dump, "Analyze phi: ");
606 print_generic_expr (vect_dump, phi, TDF_SLIM);
609 /* Skip virtual phi's. The data dependences that are associated with
610 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
611 if (!is_gimple_reg (SSA_NAME_VAR (def)))
614 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
616 /* Analyze the evolution function. */
617 access_fn = analyze_scalar_evolution (loop, def);
618 if (access_fn && vect_print_dump_info (REPORT_DETAILS))
620 fprintf (vect_dump, "Access function of PHI: ");
621 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
625 || !vect_is_simple_iv_evolution (loop->num, access_fn, &dumy, &dumy))
627 VEC_safe_push (tree, heap, worklist, phi);
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "Detected induction.");
633 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
637 /* Second - identify all reductions. */
638 while (VEC_length (tree, worklist) > 0)
640 tree phi = VEC_pop (tree, worklist);
641 tree def = PHI_RESULT (phi);
642 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
645 if (vect_print_dump_info (REPORT_DETAILS))
647 fprintf (vect_dump, "Analyze phi: ");
648 print_generic_expr (vect_dump, phi, TDF_SLIM);
651 gcc_assert (is_gimple_reg (SSA_NAME_VAR (def)));
652 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
654 reduc_stmt = vect_is_simple_reduction (loop, phi);
657 if (vect_print_dump_info (REPORT_DETAILS))
658 fprintf (vect_dump, "Detected reduction.");
659 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
660 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
664 if (vect_print_dump_info (REPORT_DETAILS))
665 fprintf (vect_dump, "Unknown def-use cycle pattern.");
668 VEC_free (tree, heap, worklist);
673 /* Function vect_insert_into_interleaving_chain.
675 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
678 vect_insert_into_interleaving_chain (struct data_reference *dra,
679 struct data_reference *drb)
681 tree prev, next, next_init;
682 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
683 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
685 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
686 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
689 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
690 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
693 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
694 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
698 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
701 /* We got to the end of the list. Insert here. */
702 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
703 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
707 /* Function vect_update_interleaving_chain.
709 For two data-refs DRA and DRB that are a part of a chain interleaved data
710 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
712 There are four possible cases:
713 1. New stmts - both DRA and DRB are not a part of any chain:
716 2. DRB is a part of a chain and DRA is not:
717 no need to update FIRST_DR
718 no need to insert DRB
719 insert DRA according to init
720 3. DRA is a part of a chain and DRB is not:
721 if (init of FIRST_DR > init of DRB)
723 NEXT(FIRST_DR) = previous FIRST_DR
725 insert DRB according to its init
726 4. both DRA and DRB are in some interleaving chains:
727 choose the chain with the smallest init of FIRST_DR
728 insert the nodes of the second chain into the first one. */
731 vect_update_interleaving_chain (struct data_reference *drb,
732 struct data_reference *dra)
734 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
735 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
736 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
737 tree node, prev, next, node_init, first_stmt;
739 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
740 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
742 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
743 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
744 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
748 /* 2. DRB is a part of a chain and DRA is not. */
749 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
751 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
752 /* Insert DRA into the chain of DRB. */
753 vect_insert_into_interleaving_chain (dra, drb);
757 /* 3. DRA is a part of a chain and DRB is not. */
758 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
760 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
761 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
765 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
767 /* DRB's init is smaller than the init of the stmt previously marked
768 as the first stmt of the interleaving chain of DRA. Therefore, we
769 update FIRST_STMT and put DRB in the head of the list. */
770 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
771 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
773 /* Update all the stmts in the list to point to the new FIRST_STMT. */
774 tmp = old_first_stmt;
777 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
778 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
783 /* Insert DRB in the list of DRA. */
784 vect_insert_into_interleaving_chain (drb, dra);
785 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
790 /* 4. both DRA and DRB are in some interleaving chains. */
791 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
792 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
793 if (first_a == first_b)
795 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
796 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
798 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
800 /* Insert the nodes of DRA chain into the DRB chain.
801 After inserting a node, continue from this node of the DRB chain (don't
802 start from the beginning. */
803 node = DR_GROUP_FIRST_DR (stmtinfo_a);
804 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
805 first_stmt = first_b;
809 /* Insert the nodes of DRB chain into the DRA chain.
810 After inserting a node, continue from this node of the DRA chain (don't
811 start from the beginning. */
812 node = DR_GROUP_FIRST_DR (stmtinfo_b);
813 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
814 first_stmt = first_a;
819 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
820 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
823 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
824 if (tree_int_cst_compare (next_init, node_init) > 0)
827 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
828 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
833 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
837 /* We got to the end of the list. Insert here. */
838 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
839 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
842 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
843 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
848 /* Function vect_equal_offsets.
850 Check if OFFSET1 and OFFSET2 are identical expressions. */
853 vect_equal_offsets (tree offset1, tree offset2)
857 STRIP_NOPS (offset1);
858 STRIP_NOPS (offset2);
860 if (offset1 == offset2)
863 if (TREE_CODE (offset1) != TREE_CODE (offset2)
864 || !BINARY_CLASS_P (offset1)
865 || !BINARY_CLASS_P (offset2))
868 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
869 TREE_OPERAND (offset2, 0));
870 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
871 TREE_OPERAND (offset2, 1));
873 return (res0 && res1);
877 /* Function vect_check_interleaving.
879 Check if DRA and DRB are a part of interleaving. In case they are, insert
880 DRA and DRB in an interleaving chain. */
883 vect_check_interleaving (struct data_reference *dra,
884 struct data_reference *drb)
886 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
888 /* Check that the data-refs have same first location (except init) and they
889 are both either store or load (not load and store). */
890 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
891 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
892 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
893 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
894 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
895 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
896 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
897 || DR_IS_READ (dra) != DR_IS_READ (drb))
901 1. data-refs are of the same type
902 2. their steps are equal
903 3. the step is greater than the difference between data-refs' inits */
904 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
905 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
907 if (type_size_a != type_size_b
908 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
911 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
912 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
913 step = TREE_INT_CST_LOW (DR_STEP (dra));
917 /* If init_a == init_b + the size of the type * k, we have an interleaving,
918 and DRB is accessed before DRA. */
919 diff_mod_size = (init_a - init_b) % type_size_a;
921 if ((init_a - init_b) > step)
924 if (diff_mod_size == 0)
926 vect_update_interleaving_chain (drb, dra);
927 if (vect_print_dump_info (REPORT_DR_DETAILS))
929 fprintf (vect_dump, "Detected interleaving ");
930 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
931 fprintf (vect_dump, " and ");
932 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
939 /* If init_b == init_a + the size of the type * k, we have an
940 interleaving, and DRA is accessed before DRB. */
941 diff_mod_size = (init_b - init_a) % type_size_a;
943 if ((init_b - init_a) > step)
946 if (diff_mod_size == 0)
948 vect_update_interleaving_chain (dra, drb);
949 if (vect_print_dump_info (REPORT_DR_DETAILS))
951 fprintf (vect_dump, "Detected interleaving ");
952 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
953 fprintf (vect_dump, " and ");
954 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
962 /* Function vect_analyze_data_ref_dependence.
964 Return TRUE if there (might) exist a dependence between a memory-reference
965 DRA and a memory-reference DRB. */
968 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
969 loop_vec_info loop_vinfo)
972 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
973 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
974 struct data_reference *dra = DDR_A (ddr);
975 struct data_reference *drb = DDR_B (ddr);
976 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
977 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
978 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
979 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
980 lambda_vector dist_v;
981 unsigned int loop_depth;
983 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
985 /* Independent data accesses. */
986 vect_check_interleaving (dra, drb);
990 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
993 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
995 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
998 "not vectorized: can't determine dependence between ");
999 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1000 fprintf (vect_dump, " and ");
1001 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1006 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1008 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1010 fprintf (vect_dump, "not vectorized: bad dist vector for ");
1011 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1012 fprintf (vect_dump, " and ");
1013 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1018 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1019 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1021 int dist = dist_v[loop_depth];
1023 if (vect_print_dump_info (REPORT_DR_DETAILS))
1024 fprintf (vect_dump, "dependence distance = %d.", dist);
1026 /* Same loop iteration. */
1027 if (dist % vectorization_factor == 0 && dra_size == drb_size)
1029 /* Two references with distance zero have the same alignment. */
1030 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1031 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1032 if (vect_print_dump_info (REPORT_ALIGNMENT))
1033 fprintf (vect_dump, "accesses have the same alignment.");
1034 if (vect_print_dump_info (REPORT_DR_DETAILS))
1036 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1037 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1038 fprintf (vect_dump, " and ");
1039 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1042 /* For interleaving, mark that there is a read-write dependency if
1043 necessary. We check before that one of the data-refs is store. */
1044 if (DR_IS_READ (dra))
1045 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
1048 if (DR_IS_READ (drb))
1049 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
1055 if (abs (dist) >= vectorization_factor)
1057 /* Dependence distance does not create dependence, as far as vectorization
1058 is concerned, in this case. */
1059 if (vect_print_dump_info (REPORT_DR_DETAILS))
1060 fprintf (vect_dump, "dependence distance >= VF.");
1064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1067 "not vectorized: possible dependence between data-refs ");
1068 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1069 fprintf (vect_dump, " and ");
1070 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1080 /* Function vect_analyze_data_ref_dependences.
1082 Examine all the data references in the loop, and make sure there do not
1083 exist any data dependences between them. */
1086 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1089 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1090 struct data_dependence_relation *ddr;
1092 if (vect_print_dump_info (REPORT_DETAILS))
1093 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1095 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1096 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
1103 /* Function vect_compute_data_ref_alignment
1105 Compute the misalignment of the data reference DR.
1108 1. If during the misalignment computation it is found that the data reference
1109 cannot be vectorized then false is returned.
1110 2. DR_MISALIGNMENT (DR) is defined.
1112 FOR NOW: No analysis is actually performed. Misalignment is calculated
1113 only for trivial cases. TODO. */
1116 vect_compute_data_ref_alignment (struct data_reference *dr)
1118 tree stmt = DR_STMT (dr);
1119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1120 tree ref = DR_REF (dr);
1122 tree base, base_addr;
1125 tree aligned_to, alignment;
1127 if (vect_print_dump_info (REPORT_DETAILS))
1128 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1130 /* Initialize misalignment to unknown. */
1131 DR_MISALIGNMENT (dr) = -1;
1133 misalign = DR_INIT (dr);
1134 aligned_to = DR_ALIGNED_TO (dr);
1135 base_addr = DR_BASE_ADDRESS (dr);
1136 base = build_fold_indirect_ref (base_addr);
1137 vectype = STMT_VINFO_VECTYPE (stmt_info);
1138 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1140 if (tree_int_cst_compare (aligned_to, alignment) < 0)
1142 if (vect_print_dump_info (REPORT_DETAILS))
1144 fprintf (vect_dump, "Unknown alignment for access: ");
1145 print_generic_expr (vect_dump, base, TDF_SLIM);
1151 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1153 || (TREE_CODE (base_addr) == SSA_NAME
1154 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1155 TREE_TYPE (base_addr)))),
1157 base_aligned = true;
1159 base_aligned = false;
1163 /* Do not change the alignment of global variables if
1164 flag_section_anchors is enabled. */
1165 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1166 || (TREE_STATIC (base) && flag_section_anchors))
1168 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "can't force alignment of ref: ");
1171 print_generic_expr (vect_dump, ref, TDF_SLIM);
1176 /* Force the alignment of the decl.
1177 NOTE: This is the only change to the code we make during
1178 the analysis phase, before deciding to vectorize the loop. */
1179 if (vect_print_dump_info (REPORT_DETAILS))
1180 fprintf (vect_dump, "force alignment");
1181 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1182 DECL_USER_ALIGN (base) = 1;
1185 /* At this point we assume that the base is aligned. */
1186 gcc_assert (base_aligned
1187 || (TREE_CODE (base) == VAR_DECL
1188 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1190 /* Modulo alignment. */
1191 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1193 if (!host_integerp (misalign, 1))
1195 /* Negative or overflowed misalignment value. */
1196 if (vect_print_dump_info (REPORT_DETAILS))
1197 fprintf (vect_dump, "unexpected misalign value");
1201 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1203 if (vect_print_dump_info (REPORT_DETAILS))
1205 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1206 print_generic_expr (vect_dump, ref, TDF_SLIM);
1213 /* Function vect_compute_data_refs_alignment
1215 Compute the misalignment of data references in the loop.
1216 Return FALSE if a data reference is found that cannot be vectorized. */
1219 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1221 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1222 struct data_reference *dr;
1225 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1226 if (!vect_compute_data_ref_alignment (dr))
1233 /* Function vect_update_misalignment_for_peel
1235 DR - the data reference whose misalignment is to be adjusted.
1236 DR_PEEL - the data reference whose misalignment is being made
1237 zero in the vector loop by the peel.
1238 NPEEL - the number of iterations in the peel loop if the misalignment
1239 of DR_PEEL is known at compile time. */
1242 vect_update_misalignment_for_peel (struct data_reference *dr,
1243 struct data_reference *dr_peel, int npeel)
1246 VEC(dr_p,heap) *same_align_drs;
1247 struct data_reference *current_dr;
1248 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1249 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1250 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1251 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1253 /* For interleaved data accesses the step in the loop must be multiplied by
1254 the size of the interleaving group. */
1255 if (DR_GROUP_FIRST_DR (stmt_info))
1256 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1257 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1258 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1260 /* It can be assumed that the data refs with the same alignment as dr_peel
1261 are aligned in the vector loop. */
1263 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1264 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1266 if (current_dr != dr)
1268 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1269 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1270 DR_MISALIGNMENT (dr) = 0;
1274 if (known_alignment_for_access_p (dr)
1275 && known_alignment_for_access_p (dr_peel))
1277 DR_MISALIGNMENT (dr) += npeel * dr_size;
1278 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1282 if (vect_print_dump_info (REPORT_DETAILS))
1283 fprintf (vect_dump, "Setting misalignment to -1.");
1284 DR_MISALIGNMENT (dr) = -1;
1288 /* Function vect_verify_datarefs_alignment
1290 Return TRUE if all data references in the loop can be
1291 handled with respect to alignment. */
1294 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1296 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1297 struct data_reference *dr;
1298 enum dr_alignment_support supportable_dr_alignment;
1301 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1303 tree stmt = DR_STMT (dr);
1304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1306 /* For interleaving, only the alignment of the first access matters. */
1307 if (DR_GROUP_FIRST_DR (stmt_info)
1308 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1311 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1312 if (!supportable_dr_alignment)
1314 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1316 if (DR_IS_READ (dr))
1318 "not vectorized: unsupported unaligned load.");
1321 "not vectorized: unsupported unaligned store.");
1325 if (supportable_dr_alignment != dr_aligned
1326 && vect_print_dump_info (REPORT_ALIGNMENT))
1327 fprintf (vect_dump, "Vectorizing an unaligned access.");
1333 /* Function vect_enhance_data_refs_alignment
1335 This pass will use loop versioning and loop peeling in order to enhance
1336 the alignment of data references in the loop.
1338 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1339 original loop is to be vectorized; Any other loops that are created by
1340 the transformations performed in this pass - are not supposed to be
1341 vectorized. This restriction will be relaxed.
1343 This pass will require a cost model to guide it whether to apply peeling
1344 or versioning or a combination of the two. For example, the scheme that
1345 intel uses when given a loop with several memory accesses, is as follows:
1346 choose one memory access ('p') which alignment you want to force by doing
1347 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1348 other accesses are not necessarily aligned, or (2) use loop versioning to
1349 generate one loop in which all accesses are aligned, and another loop in
1350 which only 'p' is necessarily aligned.
1352 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1353 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1354 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1356 Devising a cost model is the most critical aspect of this work. It will
1357 guide us on which access to peel for, whether to use loop versioning, how
1358 many versions to create, etc. The cost model will probably consist of
1359 generic considerations as well as target specific considerations (on
1360 powerpc for example, misaligned stores are more painful than misaligned
1363 Here are the general steps involved in alignment enhancements:
1365 -- original loop, before alignment analysis:
1366 for (i=0; i<N; i++){
1367 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1368 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1371 -- After vect_compute_data_refs_alignment:
1372 for (i=0; i<N; i++){
1373 x = q[i]; # DR_MISALIGNMENT(q) = 3
1374 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1377 -- Possibility 1: we do loop versioning:
1379 for (i=0; i<N; i++){ # loop 1A
1380 x = q[i]; # DR_MISALIGNMENT(q) = 3
1381 p[i] = y; # DR_MISALIGNMENT(p) = 0
1385 for (i=0; i<N; i++){ # loop 1B
1386 x = q[i]; # DR_MISALIGNMENT(q) = 3
1387 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1391 -- Possibility 2: we do loop peeling:
1392 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1396 for (i = 3; i < N; i++){ # loop 2A
1397 x = q[i]; # DR_MISALIGNMENT(q) = 0
1398 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1401 -- Possibility 3: combination of loop peeling and versioning:
1402 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1407 for (i = 3; i<N; i++){ # loop 3A
1408 x = q[i]; # DR_MISALIGNMENT(q) = 0
1409 p[i] = y; # DR_MISALIGNMENT(p) = 0
1413 for (i = 3; i<N; i++){ # loop 3B
1414 x = q[i]; # DR_MISALIGNMENT(q) = 0
1415 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1419 These loops are later passed to loop_transform to be vectorized. The
1420 vectorizer will use the alignment information to guide the transformation
1421 (whether to generate regular loads/stores, or with special handling for
1425 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1427 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1428 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1429 enum dr_alignment_support supportable_dr_alignment;
1430 struct data_reference *dr0 = NULL;
1431 struct data_reference *dr;
1433 bool do_peeling = false;
1434 bool do_versioning = false;
1437 stmt_vec_info stmt_info;
1439 if (vect_print_dump_info (REPORT_DETAILS))
1440 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1442 /* While cost model enhancements are expected in the future, the high level
1443 view of the code at this time is as follows:
1445 A) If there is a misaligned write then see if peeling to align this write
1446 can make all data references satisfy vect_supportable_dr_alignment.
1447 If so, update data structures as needed and return true. Note that
1448 at this time vect_supportable_dr_alignment is known to return false
1449 for a misaligned write.
1451 B) If peeling wasn't possible and there is a data reference with an
1452 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1453 then see if loop versioning checks can be used to make all data
1454 references satisfy vect_supportable_dr_alignment. If so, update
1455 data structures as needed and return true.
1457 C) If neither peeling nor versioning were successful then return false if
1458 any data reference does not satisfy vect_supportable_dr_alignment.
1460 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1462 Note, Possibility 3 above (which is peeling and versioning together) is not
1463 being done at this time. */
1465 /* (1) Peeling to force alignment. */
1467 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1469 + How many accesses will become aligned due to the peeling
1470 - How many accesses will become unaligned due to the peeling,
1471 and the cost of misaligned accesses.
1472 - The cost of peeling (the extra runtime checks, the increase
1475 The scheme we use FORNOW: peel to force the alignment of the first
1476 misaligned store in the loop.
1477 Rationale: misaligned stores are not yet supported.
1479 TODO: Use a cost model. */
1481 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1483 stmt = DR_STMT (dr);
1484 stmt_info = vinfo_for_stmt (stmt);
1486 /* For interleaving, only the alignment of the first access
1488 if (DR_GROUP_FIRST_DR (stmt_info)
1489 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1492 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1494 if (DR_GROUP_FIRST_DR (stmt_info))
1496 /* For interleaved access we peel only if number of iterations in
1497 the prolog loop ({VF - misalignment}), is a multiple of the
1498 number of the interleaved accesses. */
1499 int elem_size, mis_in_elements;
1500 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1501 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1503 /* FORNOW: handle only known alignment. */
1504 if (!known_alignment_for_access_p (dr))
1510 elem_size = UNITS_PER_SIMD_WORD / nelements;
1511 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1513 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1525 /* Often peeling for alignment will require peeling for loop-bound, which in
1526 turn requires that we know how to adjust the loop ivs after the loop. */
1527 if (!vect_can_advance_ivs_p (loop_vinfo)
1528 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1535 tree stmt = DR_STMT (dr0);
1536 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1537 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1538 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1540 if (known_alignment_for_access_p (dr0))
1542 /* Since it's known at compile time, compute the number of iterations
1543 in the peeled loop (the peeling factor) for use in updating
1544 DR_MISALIGNMENT values. The peeling factor is the vectorization
1545 factor minus the misalignment as an element count. */
1546 mis = DR_MISALIGNMENT (dr0);
1547 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1548 npeel = nelements - mis;
1550 /* For interleaved data access every iteration accesses all the
1551 members of the group, therefore we divide the number of iterations
1552 by the group size. */
1553 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1554 if (DR_GROUP_FIRST_DR (stmt_info))
1555 npeel /= DR_GROUP_SIZE (stmt_info);
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "Try peeling by %d", npeel);
1561 /* Ensure that all data refs can be vectorized after the peel. */
1562 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1564 int save_misalignment;
1569 stmt = DR_STMT (dr);
1570 stmt_info = vinfo_for_stmt (stmt);
1571 /* For interleaving, only the alignment of the first access
1573 if (DR_GROUP_FIRST_DR (stmt_info)
1574 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1577 save_misalignment = DR_MISALIGNMENT (dr);
1578 vect_update_misalignment_for_peel (dr, dr0, npeel);
1579 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1580 DR_MISALIGNMENT (dr) = save_misalignment;
1582 if (!supportable_dr_alignment)
1591 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1592 If the misalignment of DR_i is identical to that of dr0 then set
1593 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1594 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1595 by the peeling factor times the element size of DR_i (MOD the
1596 vectorization factor times the size). Otherwise, the
1597 misalignment of DR_i must be set to unknown. */
1598 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1600 vect_update_misalignment_for_peel (dr, dr0, npeel);
1602 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1603 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1604 DR_MISALIGNMENT (dr0) = 0;
1605 if (vect_print_dump_info (REPORT_ALIGNMENT))
1606 fprintf (vect_dump, "Alignment of access forced using peeling.");
1608 if (vect_print_dump_info (REPORT_DETAILS))
1609 fprintf (vect_dump, "Peeling for alignment will be applied.");
1611 stat = vect_verify_datarefs_alignment (loop_vinfo);
1618 /* (2) Versioning to force alignment. */
1620 /* Try versioning if:
1621 1) flag_tree_vect_loop_version is TRUE
1622 2) optimize_size is FALSE
1623 3) there is at least one unsupported misaligned data ref with an unknown
1625 4) all misaligned data refs with a known misalignment are supported, and
1626 5) the number of runtime alignment checks is within reason. */
1628 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1632 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1634 stmt = DR_STMT (dr);
1635 stmt_info = vinfo_for_stmt (stmt);
1637 /* For interleaving, only the alignment of the first access
1639 if (aligned_access_p (dr)
1640 || (DR_GROUP_FIRST_DR (stmt_info)
1641 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1644 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1646 if (!supportable_dr_alignment)
1652 if (known_alignment_for_access_p (dr)
1653 || VEC_length (tree,
1654 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1655 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1657 do_versioning = false;
1661 stmt = DR_STMT (dr);
1662 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1663 gcc_assert (vectype);
1665 /* The rightmost bits of an aligned address must be zeros.
1666 Construct the mask needed for this test. For example,
1667 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1668 mask must be 15 = 0xf. */
1669 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1671 /* FORNOW: use the same mask to test all potentially unaligned
1672 references in the loop. The vectorizer currently supports
1673 a single vector size, see the reference to
1674 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1675 vectorization factor is computed. */
1676 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1677 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1678 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1679 VEC_safe_push (tree, heap,
1680 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1685 /* Versioning requires at least one misaligned data reference. */
1686 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1687 do_versioning = false;
1688 else if (!do_versioning)
1689 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1694 VEC(tree,heap) *may_misalign_stmts
1695 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1698 /* It can now be assumed that the data references in the statements
1699 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1700 of the loop being vectorized. */
1701 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1703 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1704 dr = STMT_VINFO_DATA_REF (stmt_info);
1705 DR_MISALIGNMENT (dr) = 0;
1706 if (vect_print_dump_info (REPORT_ALIGNMENT))
1707 fprintf (vect_dump, "Alignment of access forced using versioning.");
1710 if (vect_print_dump_info (REPORT_DETAILS))
1711 fprintf (vect_dump, "Versioning for alignment will be applied.");
1713 /* Peeling and versioning can't be done together at this time. */
1714 gcc_assert (! (do_peeling && do_versioning));
1716 stat = vect_verify_datarefs_alignment (loop_vinfo);
1721 /* This point is reached if neither peeling nor versioning is being done. */
1722 gcc_assert (! (do_peeling || do_versioning));
1724 stat = vect_verify_datarefs_alignment (loop_vinfo);
1729 /* Function vect_analyze_data_refs_alignment
1731 Analyze the alignment of the data-references in the loop.
1732 Return FALSE if a data reference is found that cannot be vectorized. */
1735 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1737 if (vect_print_dump_info (REPORT_DETAILS))
1738 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1740 if (!vect_compute_data_refs_alignment (loop_vinfo))
1742 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1744 "not vectorized: can't calculate alignment for data ref.");
1752 /* Function vect_analyze_data_ref_access.
1754 Analyze the access pattern of the data-reference DR. For now, a data access
1755 has to be consecutive to be considered vectorizable. */
1758 vect_analyze_data_ref_access (struct data_reference *dr)
1760 tree step = DR_STEP (dr);
1761 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1762 tree scalar_type = TREE_TYPE (DR_REF (dr));
1763 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1764 tree stmt = DR_STMT (dr);
1765 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1766 interleaving group (including gaps). */
1767 HOST_WIDE_INT stride = dr_step / type_size;
1771 if (vect_print_dump_info (REPORT_DETAILS))
1772 fprintf (vect_dump, "bad data-ref access");
1777 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1779 /* Mark that it is not interleaving. */
1780 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1784 /* Not consecutive access is possible only if it is a part of interleaving. */
1785 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1787 /* Check if it this DR is a part of interleaving, and is a single
1788 element of the group that is accessed in the loop. */
1790 /* Gaps are supported only for loads. STEP must be a multiple of the type
1791 size. The size of the group must be a power of 2. */
1793 && (dr_step % type_size) == 0
1795 && exact_log2 (stride) != -1)
1797 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1798 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1799 if (vect_print_dump_info (REPORT_DR_DETAILS))
1801 fprintf (vect_dump, "Detected single element interleaving %d ",
1802 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1803 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1804 fprintf (vect_dump, " step ");
1805 print_generic_expr (vect_dump, step, TDF_SLIM);
1809 if (vect_print_dump_info (REPORT_DETAILS))
1810 fprintf (vect_dump, "not consecutive access");
1814 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1816 /* First stmt in the interleaving chain. Check the chain. */
1817 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1818 struct data_reference *data_ref = dr;
1819 unsigned int count = 1;
1821 tree prev_init = DR_INIT (data_ref);
1823 HOST_WIDE_INT diff, count_in_bytes;
1827 /* Skip same data-refs. In case that two or more stmts share data-ref
1828 (supported only for loads), we vectorize only the first stmt, and
1829 the rest get their vectorized loads from the first one. */
1830 if (!tree_int_cst_compare (DR_INIT (data_ref),
1831 DR_INIT (STMT_VINFO_DATA_REF (
1832 vinfo_for_stmt (next)))))
1834 if (!DR_IS_READ (data_ref))
1836 if (vect_print_dump_info (REPORT_DETAILS))
1837 fprintf (vect_dump, "Two store stmts share the same dr.");
1841 /* Check that there is no load-store dependencies for this loads
1842 to prevent a case of load-store-load to the same location. */
1843 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1844 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1846 if (vect_print_dump_info (REPORT_DETAILS))
1848 "READ_WRITE dependence in interleaving.");
1852 /* For load use the same data-ref load. */
1853 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1856 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1861 /* Check that all the accesses have the same STEP. */
1862 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1863 if (tree_int_cst_compare (step, next_step))
1865 if (vect_print_dump_info (REPORT_DETAILS))
1866 fprintf (vect_dump, "not consecutive access in interleaving");
1870 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1871 /* Check that the distance between two accesses is equal to the type
1872 size. Otherwise, we have gaps. */
1873 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1874 - TREE_INT_CST_LOW (prev_init)) / type_size;
1875 if (!DR_IS_READ (data_ref) && diff != 1)
1877 if (vect_print_dump_info (REPORT_DETAILS))
1878 fprintf (vect_dump, "interleaved store with gaps");
1881 /* Store the gap from the previous member of the group. If there is no
1882 gap in the access, DR_GROUP_GAP is always 1. */
1883 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1885 prev_init = DR_INIT (data_ref);
1886 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1887 /* Count the number of data-refs in the chain. */
1891 /* COUNT is the number of accesses found, we multiply it by the size of
1892 the type to get COUNT_IN_BYTES. */
1893 count_in_bytes = type_size * count;
1895 /* Check that the size of the interleaving is not greater than STEP. */
1896 if (dr_step < count_in_bytes)
1898 if (vect_print_dump_info (REPORT_DETAILS))
1900 fprintf (vect_dump, "interleaving size is greater than step for ");
1901 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1906 /* Check that the size of the interleaving is equal to STEP for stores,
1907 i.e., that there are no gaps. */
1908 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1910 if (vect_print_dump_info (REPORT_DETAILS))
1911 fprintf (vect_dump, "interleaved store with gaps");
1915 /* Check that STEP is a multiple of type size. */
1916 if ((dr_step % type_size) != 0)
1918 if (vect_print_dump_info (REPORT_DETAILS))
1920 fprintf (vect_dump, "step is not a multiple of type size: step ");
1921 print_generic_expr (vect_dump, step, TDF_SLIM);
1922 fprintf (vect_dump, " size ");
1923 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1929 /* FORNOW: we handle only interleaving that is a power of 2. */
1930 if (exact_log2 (stride) == -1)
1932 if (vect_print_dump_info (REPORT_DETAILS))
1933 fprintf (vect_dump, "interleaving is not a power of 2");
1936 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1942 /* Function vect_analyze_data_ref_accesses.
1944 Analyze the access pattern of all the data references in the loop.
1946 FORNOW: the only access pattern that is considered vectorizable is a
1947 simple step 1 (consecutive) access.
1949 FORNOW: handle only arrays and pointer accesses. */
1952 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1955 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1956 struct data_reference *dr;
1958 if (vect_print_dump_info (REPORT_DETAILS))
1959 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1961 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1962 if (!vect_analyze_data_ref_access (dr))
1964 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1965 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1973 /* Function vect_analyze_data_refs.
1975 Find all the data references in the loop.
1977 The general structure of the analysis of data refs in the vectorizer is as
1979 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1980 find and analyze all data-refs in the loop and their dependences.
1981 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1982 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1983 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1988 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1990 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1992 VEC (data_reference_p, heap) *datarefs;
1993 struct data_reference *dr;
1996 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1999 compute_data_dependences_for_loop (loop, true,
2000 &LOOP_VINFO_DATAREFS (loop_vinfo),
2001 &LOOP_VINFO_DDRS (loop_vinfo));
2003 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2004 from stmt_vec_info struct to DR and vectype. */
2005 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2007 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2010 stmt_vec_info stmt_info;
2012 if (!dr || !DR_REF (dr))
2014 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2015 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2019 /* Update DR field in stmt_vec_info struct. */
2020 stmt = DR_STMT (dr);
2021 stmt_info = vinfo_for_stmt (stmt);
2023 if (STMT_VINFO_DATA_REF (stmt_info))
2025 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2028 "not vectorized: more than one data ref in stmt: ");
2029 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2033 STMT_VINFO_DATA_REF (stmt_info) = dr;
2035 /* Check that analysis of the data-ref succeeded. */
2036 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2039 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2041 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2042 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2046 if (!DR_SYMBOL_TAG (dr))
2048 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2050 fprintf (vect_dump, "not vectorized: no memory tag for ");
2051 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2056 /* Set vectype for STMT. */
2057 scalar_type = TREE_TYPE (DR_REF (dr));
2058 STMT_VINFO_VECTYPE (stmt_info) =
2059 get_vectype_for_scalar_type (scalar_type);
2060 if (!STMT_VINFO_VECTYPE (stmt_info))
2062 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2065 "not vectorized: no vectype for stmt: ");
2066 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2067 fprintf (vect_dump, " scalar_type: ");
2068 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2078 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2080 /* Function vect_mark_relevant.
2082 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2085 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2086 enum vect_relevant relevant, bool live_p)
2088 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2089 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2090 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2092 if (vect_print_dump_info (REPORT_DETAILS))
2093 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2095 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2099 /* This is the last stmt in a sequence that was detected as a
2100 pattern that can potentially be vectorized. Don't mark the stmt
2101 as relevant/live because it's not going to vectorized.
2102 Instead mark the pattern-stmt that replaces it. */
2103 if (vect_print_dump_info (REPORT_DETAILS))
2104 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2105 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2106 stmt_info = vinfo_for_stmt (pattern_stmt);
2107 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2108 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2109 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2110 stmt = pattern_stmt;
2113 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2114 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2115 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2117 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2118 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2120 if (vect_print_dump_info (REPORT_DETAILS))
2121 fprintf (vect_dump, "already marked relevant/live.");
2125 VEC_safe_push (tree, heap, *worklist, stmt);
2129 /* Function vect_stmt_relevant_p.
2131 Return true if STMT in loop that is represented by LOOP_VINFO is
2132 "relevant for vectorization".
2134 A stmt is considered "relevant for vectorization" if:
2135 - it has uses outside the loop.
2136 - it has vdefs (it alters memory).
2137 - control stmts in the loop (except for the exit condition).
2139 CHECKME: what other side effects would the vectorizer allow? */
2142 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2143 enum vect_relevant *relevant, bool *live_p)
2145 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2146 ssa_op_iter op_iter;
2147 imm_use_iterator imm_iter;
2148 use_operand_p use_p;
2149 def_operand_p def_p;
2151 *relevant = vect_unused_in_loop;
2154 /* cond stmt other than loop exit cond. */
2155 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2156 *relevant = vect_used_in_loop;
2158 /* changing memory. */
2159 if (TREE_CODE (stmt) != PHI_NODE)
2160 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2162 if (vect_print_dump_info (REPORT_DETAILS))
2163 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2164 *relevant = vect_used_in_loop;
2167 /* uses outside the loop. */
2168 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2170 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2172 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2173 if (!flow_bb_inside_loop_p (loop, bb))
2175 if (vect_print_dump_info (REPORT_DETAILS))
2176 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2178 /* We expect all such uses to be in the loop exit phis
2179 (because of loop closed form) */
2180 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2181 gcc_assert (bb == single_exit (loop)->dest);
2188 return (*live_p || *relevant);
2193 Function process_use.
2196 - a USE in STMT in a loop represented by LOOP_VINFO
2197 - LIVE_P, RELEVANT - enum values to be set in the STMT_VINFO of the stmt
2198 that defined USE. This is dont by calling mark_relevant and passing it
2199 the WORKLIST (to add DEF_STMT to the WORKlist in case itis relevant).
2202 Generally, LIVE_P and RELEVANT are used to define the liveness and
2203 relevance info of the DEF_STMT of this USE:
2204 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2205 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2207 - case 1: If USE is used only for address computations (e.g. array indexing),
2208 which does not need to be directly vectorized, then the liveness/relevance
2209 of the respective DEF_STMT is left unchanged.
2210 - case 2: If STMT is a reduction phi and DEF_STMT is a reduction stmt, we
2211 skip DEF_STMT cause it had already been processed.
2213 Return true if everyting is as expected. Return false otherwise. */
2216 process_use (tree stmt, tree use, loop_vec_info loop_vinfo, bool live_p,
2217 enum vect_relevant relevant, VEC(tree,heap) **worklist)
2219 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2220 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2221 stmt_vec_info dstmt_vinfo;
2224 enum vect_def_type dt;
2226 /* case 1: we are only interested in uses that need to be vectorized. Uses
2227 that are used for address computation are not considered relevant. */
2228 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2231 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2233 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2234 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2238 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2241 def_bb = bb_for_stmt (def_stmt);
2242 if (!flow_bb_inside_loop_p (loop, def_bb))
2245 /* case 2: A reduction phi defining a reduction stmt (DEF_STMT). DEF_STMT
2246 must have already been processed, so we just check that everything is as
2247 expected, and we are done. */
2248 dstmt_vinfo = vinfo_for_stmt (def_stmt);
2249 if (TREE_CODE (stmt) == PHI_NODE
2250 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2251 && TREE_CODE (def_stmt) != PHI_NODE
2252 && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def)
2254 if (STMT_VINFO_IN_PATTERN_P (dstmt_vinfo))
2255 dstmt_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (dstmt_vinfo));
2256 gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
2257 gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
2258 || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_loop);
2262 vect_mark_relevant (worklist, def_stmt, relevant, live_p);
2267 /* Function vect_mark_stmts_to_be_vectorized.
2269 Not all stmts in the loop need to be vectorized. For example:
2278 Stmt 1 and 3 do not need to be vectorized, because loop control and
2279 addressing of vectorized data-refs are handled differently.
2281 This pass detects such stmts. */
2284 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2286 VEC(tree,heap) *worklist;
2287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2288 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2289 unsigned int nbbs = loop->num_nodes;
2290 block_stmt_iterator si;
2294 stmt_vec_info stmt_vinfo;
2298 enum vect_relevant relevant;
2300 if (vect_print_dump_info (REPORT_DETAILS))
2301 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2303 worklist = VEC_alloc (tree, heap, 64);
2305 /* 1. Init worklist. */
2306 for (i = 0; i < nbbs; i++)
2309 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2311 if (vect_print_dump_info (REPORT_DETAILS))
2313 fprintf (vect_dump, "init: phi relevant? ");
2314 print_generic_expr (vect_dump, phi, TDF_SLIM);
2317 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2318 vect_mark_relevant (&worklist, phi, relevant, live_p);
2320 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2322 stmt = bsi_stmt (si);
2323 if (vect_print_dump_info (REPORT_DETAILS))
2325 fprintf (vect_dump, "init: stmt relevant? ");
2326 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2329 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2330 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2334 /* 2. Process_worklist */
2335 while (VEC_length (tree, worklist) > 0)
2337 use_operand_p use_p;
2340 stmt = VEC_pop (tree, worklist);
2341 if (vect_print_dump_info (REPORT_DETAILS))
2343 fprintf (vect_dump, "worklist: examine stmt: ");
2344 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2347 /* Examine the USEs of STMT. For each USE, mark the stmt that defines it
2348 (DEF_STMT) as relevant/irrelevant and live/dead according to the
2349 liveness and relevance properties of STMT. */
2350 ann = stmt_ann (stmt);
2351 stmt_vinfo = vinfo_for_stmt (stmt);
2352 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2353 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2355 /* Generally, the liveness and relevance properties of STMT are
2356 propagated as is to the DEF_STMTs of its USEs:
2357 live_p <-- STMT_VINFO_LIVE_P (STMT_VINFO)
2358 relevant <-- STMT_VINFO_RELEVANT (STMT_VINFO)
2360 One exception is when STMT has been identified as defining a reduction
2361 variable; in this case we set the liveness/relevance as follows:
2363 relevant = vect_used_by_reduction
2364 This is because we distinguish between two kinds of relevant stmts -
2365 those that are used by a reduction computation, and those that are
2366 (also) used by a regular computation. This allows us later on to
2367 identify stmts that are used solely by a reduction, and therefore the
2368 order of the results that they produce does not have to be kept.
2370 Reduction phis are expected to be used by a reduction stmt; Other
2371 reduction stmts are expected to be unused in the loop. These are the
2372 expected values of "relevant" for reduction phis/stmts in the loop:
2375 vect_unused_in_loop ok
2376 vect_used_by_reduction ok
2377 vect_used_in_loop */
2379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2383 case vect_unused_in_loop:
2384 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2386 case vect_used_by_reduction:
2387 if (TREE_CODE (stmt) == PHI_NODE)
2389 case vect_used_in_loop:
2391 if (vect_print_dump_info (REPORT_DETAILS))
2392 fprintf (vect_dump, "unsupported use of reduction.");
2393 VEC_free (tree, heap, worklist);
2396 relevant = vect_used_by_reduction;
2400 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2402 tree op = USE_FROM_PTR (use_p);
2403 if (!process_use (stmt, op, loop_vinfo, live_p, relevant, &worklist))
2405 VEC_free (tree, heap, worklist);
2409 } /* while worklist */
2411 VEC_free (tree, heap, worklist);
2416 /* Function vect_can_advance_ivs_p
2418 In case the number of iterations that LOOP iterates is unknown at compile
2419 time, an epilog loop will be generated, and the loop induction variables
2420 (IVs) will be "advanced" to the value they are supposed to take just before
2421 the epilog loop. Here we check that the access function of the loop IVs
2422 and the expression that represents the loop bound are simple enough.
2423 These restrictions will be relaxed in the future. */
2426 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2428 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2429 basic_block bb = loop->header;
2432 /* Analyze phi functions of the loop header. */
2434 if (vect_print_dump_info (REPORT_DETAILS))
2435 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2437 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2439 tree access_fn = NULL;
2440 tree evolution_part;
2442 if (vect_print_dump_info (REPORT_DETAILS))
2444 fprintf (vect_dump, "Analyze phi: ");
2445 print_generic_expr (vect_dump, phi, TDF_SLIM);
2448 /* Skip virtual phi's. The data dependences that are associated with
2449 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2451 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2453 if (vect_print_dump_info (REPORT_DETAILS))
2454 fprintf (vect_dump, "virtual phi. skip.");
2458 /* Skip reduction phis. */
2460 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2462 if (vect_print_dump_info (REPORT_DETAILS))
2463 fprintf (vect_dump, "reduc phi. skip.");
2467 /* Analyze the evolution function. */
2469 access_fn = instantiate_parameters
2470 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2474 if (vect_print_dump_info (REPORT_DETAILS))
2475 fprintf (vect_dump, "No Access function.");
2479 if (vect_print_dump_info (REPORT_DETAILS))
2481 fprintf (vect_dump, "Access function of PHI: ");
2482 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2485 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2487 if (evolution_part == NULL_TREE)
2489 if (vect_print_dump_info (REPORT_DETAILS))
2490 fprintf (vect_dump, "No evolution.");
2494 /* FORNOW: We do not transform initial conditions of IVs
2495 which evolution functions are a polynomial of degree >= 2. */
2497 if (tree_is_chrec (evolution_part))
2505 /* Function vect_get_loop_niters.
2507 Determine how many iterations the loop is executed.
2508 If an expression that represents the number of iterations
2509 can be constructed, place it in NUMBER_OF_ITERATIONS.
2510 Return the loop exit condition. */
2513 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2517 if (vect_print_dump_info (REPORT_DETAILS))
2518 fprintf (vect_dump, "=== get_loop_niters ===");
2520 niters = number_of_exit_cond_executions (loop);
2522 if (niters != NULL_TREE
2523 && niters != chrec_dont_know)
2525 *number_of_iterations = niters;
2527 if (vect_print_dump_info (REPORT_DETAILS))
2529 fprintf (vect_dump, "==> get_loop_niters:" );
2530 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2534 return get_loop_exit_condition (loop);
2538 /* Function vect_analyze_loop_form.
2540 Verify the following restrictions (some may be relaxed in the future):
2541 - it's an inner-most loop
2542 - number of BBs = 2 (which are the loop header and the latch)
2543 - the loop has a pre-header
2544 - the loop has a single entry and exit
2545 - the loop exit condition is simple enough, and the number of iterations
2546 can be analyzed (a countable loop). */
2548 static loop_vec_info
2549 vect_analyze_loop_form (struct loop *loop)
2551 loop_vec_info loop_vinfo;
2553 tree number_of_iterations = NULL;
2555 if (vect_print_dump_info (REPORT_DETAILS))
2556 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2560 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2561 fprintf (vect_dump, "not vectorized: nested loop.");
2565 if (!single_exit (loop)
2566 || loop->num_nodes != 2
2567 || EDGE_COUNT (loop->header->preds) != 2)
2569 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2571 if (!single_exit (loop))
2572 fprintf (vect_dump, "not vectorized: multiple exits.");
2573 else if (loop->num_nodes != 2)
2574 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2575 else if (EDGE_COUNT (loop->header->preds) != 2)
2576 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2582 /* We assume that the loop exit condition is at the end of the loop. i.e,
2583 that the loop is represented as a do-while (with a proper if-guard
2584 before the loop if needed), where the loop header contains all the
2585 executable statements, and the latch is empty. */
2586 if (!empty_block_p (loop->latch)
2587 || phi_nodes (loop->latch))
2589 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2590 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2594 /* Make sure there exists a single-predecessor exit bb: */
2595 if (!single_pred_p (single_exit (loop)->dest))
2597 edge e = single_exit (loop);
2598 if (!(e->flags & EDGE_ABNORMAL))
2600 split_loop_exit_edge (e);
2601 if (vect_print_dump_info (REPORT_DETAILS))
2602 fprintf (vect_dump, "split exit edge.");
2606 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2607 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2612 if (empty_block_p (loop->header))
2614 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2615 fprintf (vect_dump, "not vectorized: empty loop.");
2619 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2622 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2623 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2627 if (!number_of_iterations)
2629 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2631 "not vectorized: number of iterations cannot be computed.");
2635 if (chrec_contains_undetermined (number_of_iterations))
2637 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2638 fprintf (vect_dump, "Infinite number of iterations.");
2642 if (!NITERS_KNOWN_P (number_of_iterations))
2644 if (vect_print_dump_info (REPORT_DETAILS))
2646 fprintf (vect_dump, "Symbolic number of iterations is ");
2647 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2650 else if (TREE_INT_CST_LOW (number_of_iterations) == 0)
2652 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2653 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2657 loop_vinfo = new_loop_vec_info (loop);
2658 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2659 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2661 gcc_assert (!loop->aux);
2662 loop->aux = loop_vinfo;
2667 /* Function vect_analyze_loop.
2669 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2670 for it. The different analyses will record information in the
2671 loop_vec_info struct. */
2673 vect_analyze_loop (struct loop *loop)
2676 loop_vec_info loop_vinfo;
2678 if (vect_print_dump_info (REPORT_DETAILS))
2679 fprintf (vect_dump, "===== analyze_loop_nest =====");
2681 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2683 loop_vinfo = vect_analyze_loop_form (loop);
2686 if (vect_print_dump_info (REPORT_DETAILS))
2687 fprintf (vect_dump, "bad loop form.");
2691 /* Find all data references in the loop (which correspond to vdefs/vuses)
2692 and analyze their evolution in the loop.
2694 FORNOW: Handle only simple, array references, which
2695 alignment can be forced, and aligned pointer-references. */
2697 ok = vect_analyze_data_refs (loop_vinfo);
2700 if (vect_print_dump_info (REPORT_DETAILS))
2701 fprintf (vect_dump, "bad data references.");
2702 destroy_loop_vec_info (loop_vinfo);
2706 /* Classify all cross-iteration scalar data-flow cycles.
2707 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2709 vect_analyze_scalar_cycles (loop_vinfo);
2711 vect_pattern_recog (loop_vinfo);
2713 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2715 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2718 if (vect_print_dump_info (REPORT_DETAILS))
2719 fprintf (vect_dump, "unexpected pattern.");
2720 destroy_loop_vec_info (loop_vinfo);
2724 /* Analyze the alignment of the data-refs in the loop.
2725 Fail if a data reference is found that cannot be vectorized. */
2727 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2730 if (vect_print_dump_info (REPORT_DETAILS))
2731 fprintf (vect_dump, "bad data alignment.");
2732 destroy_loop_vec_info (loop_vinfo);
2736 ok = vect_determine_vectorization_factor (loop_vinfo);
2739 if (vect_print_dump_info (REPORT_DETAILS))
2740 fprintf (vect_dump, "can't determine vectorization factor.");
2741 destroy_loop_vec_info (loop_vinfo);
2745 /* Analyze data dependences between the data-refs in the loop.
2746 FORNOW: fail at the first data dependence that we encounter. */
2748 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2751 if (vect_print_dump_info (REPORT_DETAILS))
2752 fprintf (vect_dump, "bad data dependence.");
2753 destroy_loop_vec_info (loop_vinfo);
2757 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2758 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2760 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2763 if (vect_print_dump_info (REPORT_DETAILS))
2764 fprintf (vect_dump, "bad data access.");
2765 destroy_loop_vec_info (loop_vinfo);
2769 /* This pass will decide on using loop versioning and/or loop peeling in
2770 order to enhance the alignment of data references in the loop. */
2772 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2775 if (vect_print_dump_info (REPORT_DETAILS))
2776 fprintf (vect_dump, "bad data alignment.");
2777 destroy_loop_vec_info (loop_vinfo);
2781 /* Scan all the operations in the loop and make sure they are
2784 ok = vect_analyze_operations (loop_vinfo);
2787 if (vect_print_dump_info (REPORT_DETAILS))
2788 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2789 destroy_loop_vec_info (loop_vinfo);
2793 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;