1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
41 /* Main analysis functions. */
42 static loop_vec_info vect_analyze_loop_form (struct loop *);
43 static bool vect_analyze_data_refs (loop_vec_info);
44 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
45 static void vect_analyze_scalar_cycles (loop_vec_info);
46 static bool vect_analyze_data_ref_accesses (loop_vec_info);
47 static bool vect_analyze_data_ref_dependences (loop_vec_info);
48 static bool vect_analyze_data_refs_alignment (loop_vec_info);
49 static bool vect_compute_data_refs_alignment (loop_vec_info);
50 static void vect_enhance_data_refs_alignment (loop_vec_info);
51 static bool vect_analyze_operations (loop_vec_info);
52 static bool vect_determine_vectorization_factor (loop_vec_info);
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
65 /* Function vect_determine_vectorization_factor
67 Determine the vectorization factor (VF). VF is the number of data elements
68 that are operated upon in parallel in a single iteration of the vectorized
69 loop. For example, when vectorizing a loop that operates on 4byte elements,
70 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
71 elements can fit in a single vector register.
73 We currently support vectorization of loops in which all types operated upon
74 are of the same size. Therefore this function currently sets VF according to
75 the size of the types operated upon, and fails if there are multiple sizes
78 VF is also the factor by which the loop iterations are strip-mined, e.g.:
85 for (i=0; i<N; i+=VF){
86 a[i:VF] = b[i:VF] + c[i:VF];
91 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
93 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
94 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
95 int nbbs = loop->num_nodes;
96 block_stmt_iterator si;
97 unsigned int vectorization_factor = 0;
101 if (vect_print_dump_info (REPORT_DETAILS))
102 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
104 for (i = 0; i < nbbs; i++)
106 basic_block bb = bbs[i];
108 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
110 tree stmt = bsi_stmt (si);
112 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
115 if (vect_print_dump_info (REPORT_DETAILS))
117 fprintf (vect_dump, "==> examining statement: ");
118 print_generic_expr (vect_dump, stmt, TDF_SLIM);
121 gcc_assert (stmt_info);
122 /* skip stmts which do not need to be vectorized. */
123 if (!STMT_VINFO_RELEVANT_P (stmt_info)
124 && !STMT_VINFO_LIVE_P (stmt_info))
126 if (vect_print_dump_info (REPORT_DETAILS))
127 fprintf (vect_dump, "skip.");
131 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
133 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
135 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
136 print_generic_expr (vect_dump, stmt, TDF_SLIM);
141 if (STMT_VINFO_DATA_REF (stmt_info))
142 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
143 else if (TREE_CODE (stmt) == MODIFY_EXPR)
144 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
146 scalar_type = TREE_TYPE (stmt);
148 if (vect_print_dump_info (REPORT_DETAILS))
150 fprintf (vect_dump, "get vectype for scalar type: ");
151 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
154 vectype = get_vectype_for_scalar_type (scalar_type);
157 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
159 fprintf (vect_dump, "not vectorized: unsupported data-type ");
160 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
164 if (vect_print_dump_info (REPORT_DETAILS))
166 fprintf (vect_dump, "vectype: ");
167 print_generic_expr (vect_dump, vectype, TDF_SLIM);
169 STMT_VINFO_VECTYPE (stmt_info) = vectype;
171 nunits = TYPE_VECTOR_SUBPARTS (vectype);
172 if (vect_print_dump_info (REPORT_DETAILS))
173 fprintf (vect_dump, "nunits = %d", nunits);
175 if (vectorization_factor)
177 /* FORNOW: don't allow mixed units.
178 This restriction will be relaxed in the future. */
179 if (nunits != vectorization_factor)
181 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
182 fprintf (vect_dump, "not vectorized: mixed data-types");
187 vectorization_factor = nunits;
189 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
190 * vectorization_factor == UNITS_PER_SIMD_WORD);
194 /* TODO: Analyze cost. Decide if worth while to vectorize. */
196 if (vectorization_factor <= 1)
198 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
199 fprintf (vect_dump, "not vectorized: unsupported data-type");
202 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
208 /* Function vect_analyze_operations.
210 Scan the loop stmts and make sure they are all vectorizable. */
213 vect_analyze_operations (loop_vec_info loop_vinfo)
215 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
216 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
217 int nbbs = loop->num_nodes;
218 block_stmt_iterator si;
219 unsigned int vectorization_factor = 0;
223 stmt_vec_info stmt_info;
224 bool need_to_vectorize = false;
226 if (vect_print_dump_info (REPORT_DETAILS))
227 fprintf (vect_dump, "=== vect_analyze_operations ===");
229 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
230 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
232 for (i = 0; i < nbbs; i++)
234 basic_block bb = bbs[i];
236 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
238 stmt_info = vinfo_for_stmt (phi);
239 if (vect_print_dump_info (REPORT_DETAILS))
241 fprintf (vect_dump, "examining phi: ");
242 print_generic_expr (vect_dump, phi, TDF_SLIM);
245 gcc_assert (stmt_info);
247 if (STMT_VINFO_LIVE_P (stmt_info))
249 /* FORNOW: not yet supported. */
250 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
251 fprintf (vect_dump, "not vectorized: value used after loop.");
255 if (STMT_VINFO_RELEVANT_P (stmt_info))
257 /* Most likely a reduction-like computation that is used
259 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
260 fprintf (vect_dump, "not vectorized: unsupported pattern.");
265 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
267 tree stmt = bsi_stmt (si);
268 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
270 if (vect_print_dump_info (REPORT_DETAILS))
272 fprintf (vect_dump, "==> examining statement: ");
273 print_generic_expr (vect_dump, stmt, TDF_SLIM);
276 gcc_assert (stmt_info);
278 /* skip stmts which do not need to be vectorized.
279 this is expected to include:
280 - the COND_EXPR which is the loop exit condition
281 - any LABEL_EXPRs in the loop
282 - computations that are used only for array indexing or loop
285 if (!STMT_VINFO_RELEVANT_P (stmt_info)
286 && !STMT_VINFO_LIVE_P (stmt_info))
288 if (vect_print_dump_info (REPORT_DETAILS))
289 fprintf (vect_dump, "irrelevant.");
293 if (STMT_VINFO_RELEVANT_P (stmt_info))
295 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
296 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
298 ok = (vectorizable_operation (stmt, NULL, NULL)
299 || vectorizable_assignment (stmt, NULL, NULL)
300 || vectorizable_load (stmt, NULL, NULL)
301 || vectorizable_store (stmt, NULL, NULL)
302 || vectorizable_condition (stmt, NULL, NULL));
306 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
309 "not vectorized: relevant stmt not supported: ");
310 print_generic_expr (vect_dump, stmt, TDF_SLIM);
314 need_to_vectorize = true;
317 if (STMT_VINFO_LIVE_P (stmt_info))
319 ok = vectorizable_reduction (stmt, NULL, NULL);
322 need_to_vectorize = true;
324 ok = vectorizable_live_operation (stmt, NULL, NULL);
328 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
331 "not vectorized: live stmt not supported: ");
332 print_generic_expr (vect_dump, stmt, TDF_SLIM);
340 /* TODO: Analyze cost. Decide if worth while to vectorize. */
342 /* All operations in the loop are either irrelevant (deal with loop
343 control, or dead), or only used outside the loop and can be moved
344 out of the loop (e.g. invariants, inductions). The loop can be
345 optimized away by scalar optimizations. We're better off not
346 touching this loop. */
347 if (!need_to_vectorize)
349 if (vect_print_dump_info (REPORT_DETAILS))
351 "All the computation can be taken out of the loop.");
352 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
354 "not vectorized: redundant loop. no profit to vectorize.");
358 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
359 && vect_print_dump_info (REPORT_DETAILS))
361 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
362 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
364 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
365 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
367 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
368 fprintf (vect_dump, "not vectorized: iteration count too small.");
372 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
373 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
375 if (vect_print_dump_info (REPORT_DETAILS))
376 fprintf (vect_dump, "epilog loop required.");
377 if (!vect_can_advance_ivs_p (loop_vinfo))
379 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
381 "not vectorized: can't create epilog loop 1.");
384 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
386 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
388 "not vectorized: can't create epilog loop 2.");
397 /* Function exist_non_indexing_operands_for_use_p
399 USE is one of the uses attached to STMT. Check if USE is
400 used in STMT for anything other than indexing an array. */
403 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
406 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
408 /* USE corresponds to some operand in STMT. If there is no data
409 reference in STMT, then any operand that corresponds to USE
410 is not indexing an array. */
411 if (!STMT_VINFO_DATA_REF (stmt_info))
414 /* STMT has a data_ref. FORNOW this means that its of one of
418 (This should have been verified in analyze_data_refs).
420 'var' in the second case corresponds to a def, not a use,
421 so USE cannot correspond to any operands that are not used
424 Therefore, all we need to check is if STMT falls into the
425 first case, and whether var corresponds to USE. */
427 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
430 operand = TREE_OPERAND (stmt, 1);
432 if (TREE_CODE (operand) != SSA_NAME)
442 /* Function vect_analyze_scalar_cycles.
444 Examine the cross iteration def-use cycles of scalar variables, by
445 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
446 following: invariant, induction, reduction, unknown.
448 Some forms of scalar cycles are not yet supported.
450 Example1: reduction: (unsupported yet)
456 Example2: induction: (unsupported yet)
462 Note: the following loop *is* vectorizable:
468 even though it has a def-use cycle caused by the induction variable i:
470 loop: i_2 = PHI (i_0, i_1)
475 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
476 it does not need to be vectorized because it is only used for array
477 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
478 loop2 on the other hand is relevant (it is being written to memory).
482 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
486 basic_block bb = loop->header;
489 if (vect_print_dump_info (REPORT_DETAILS))
490 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
492 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
494 tree access_fn = NULL;
495 tree def = PHI_RESULT (phi);
496 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
499 if (vect_print_dump_info (REPORT_DETAILS))
501 fprintf (vect_dump, "Analyze phi: ");
502 print_generic_expr (vect_dump, phi, TDF_SLIM);
505 /* Skip virtual phi's. The data dependences that are associated with
506 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
508 if (!is_gimple_reg (SSA_NAME_VAR (def)))
510 if (vect_print_dump_info (REPORT_DETAILS))
511 fprintf (vect_dump, "virtual phi. skip.");
515 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
517 /* Analyze the evolution function. */
519 access_fn = analyze_scalar_evolution (loop, def);
524 if (vect_print_dump_info (REPORT_DETAILS))
526 fprintf (vect_dump, "Access function of PHI: ");
527 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
530 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
532 if (vect_print_dump_info (REPORT_DETAILS))
533 fprintf (vect_dump, "Detected induction.");
534 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
538 /* TODO: handle invariant phis */
540 reduc_stmt = vect_is_simple_reduction (loop, phi);
543 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "Detected reduction.");
545 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
546 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
550 if (vect_print_dump_info (REPORT_DETAILS))
551 fprintf (vect_dump, "Unknown def-use cycle pattern.");
559 /* Function vect_analyze_data_ref_dependence.
561 Return TRUE if there (might) exist a dependence between a memory-reference
562 DRA and a memory-reference DRB. */
565 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
566 loop_vec_info loop_vinfo)
568 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
569 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
571 unsigned int loop_depth = 0;
572 struct loop *loop_nest = loop;
573 struct data_reference *dra = DDR_A (ddr);
574 struct data_reference *drb = DDR_B (ddr);
575 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
576 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
578 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
581 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
583 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
586 "not vectorized: can't determine dependence between ");
587 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
588 fprintf (vect_dump, " and ");
589 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
594 if (!DDR_DIST_VECT (ddr))
596 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
598 fprintf (vect_dump, "not vectorized: bad dist vector for ");
599 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
600 fprintf (vect_dump, " and ");
601 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
606 /* Find loop depth. */
607 while (loop_nest && loop_nest->outer && loop_nest->outer->outer)
609 loop_nest = loop_nest->outer;
613 dist = DDR_DIST_VECT (ddr)[loop_depth];
614 if (vect_print_dump_info (REPORT_DR_DETAILS))
615 fprintf (vect_dump, "dependence distance = %d.",dist);
617 /* Same loop iteration. */
618 if (dist % vectorization_factor == 0)
620 /* Two references with distance zero have the same alignment. */
621 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
622 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
623 if (vect_print_dump_info (REPORT_ALIGNMENT))
624 fprintf (vect_dump, "accesses have the same alignment.");
625 if (vect_print_dump_info (REPORT_DR_DETAILS))
627 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
628 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
629 fprintf (vect_dump, " and ");
630 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
635 if (abs (dist) >= vectorization_factor)
637 /* Dependence distance does not create dependence, as far as vectorization
638 is concerned, in this case. */
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
640 fprintf (vect_dump, "dependence distance >= VF.");
644 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
647 "not vectorized: possible dependence between data-refs ");
648 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
649 fprintf (vect_dump, " and ");
650 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
657 /* Function vect_analyze_data_ref_dependences.
659 Examine all the data references in the loop, and make sure there do not
660 exist any data dependences between them. */
663 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
666 varray_type ddrs = LOOP_VINFO_DDRS (loop_vinfo);
668 if (vect_print_dump_info (REPORT_DETAILS))
669 fprintf (vect_dump, "=== vect_analyze_dependences ===");
671 for (i = 0; i < VARRAY_ACTIVE_SIZE (ddrs); i++)
673 struct data_dependence_relation *ddr = VARRAY_GENERIC_PTR (ddrs, i);
675 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
683 /* Function vect_compute_data_ref_alignment
685 Compute the misalignment of the data reference DR.
688 1. If during the misalignment computation it is found that the data reference
689 cannot be vectorized then false is returned.
690 2. DR_MISALIGNMENT (DR) is defined.
692 FOR NOW: No analysis is actually performed. Misalignment is calculated
693 only for trivial cases. TODO. */
696 vect_compute_data_ref_alignment (struct data_reference *dr)
698 tree stmt = DR_STMT (dr);
699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
700 tree ref = DR_REF (dr);
702 tree base, base_addr;
705 tree aligned_to, alignment;
707 if (vect_print_dump_info (REPORT_DETAILS))
708 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
710 /* Initialize misalignment to unknown. */
711 DR_MISALIGNMENT (dr) = -1;
713 misalign = DR_OFFSET_MISALIGNMENT (dr);
714 aligned_to = DR_ALIGNED_TO (dr);
715 base_addr = DR_BASE_ADDRESS (dr);
716 base = build_fold_indirect_ref (base_addr);
717 vectype = STMT_VINFO_VECTYPE (stmt_info);
718 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
720 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
723 if (vect_print_dump_info (REPORT_DETAILS))
725 fprintf (vect_dump, "Unknown alignment for access: ");
726 print_generic_expr (vect_dump, base, TDF_SLIM);
732 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
734 || (TREE_CODE (base_addr) == SSA_NAME
735 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
736 TREE_TYPE (base_addr)))),
740 base_aligned = false;
744 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
746 if (vect_print_dump_info (REPORT_DETAILS))
748 fprintf (vect_dump, "can't force alignment of ref: ");
749 print_generic_expr (vect_dump, ref, TDF_SLIM);
754 /* Force the alignment of the decl.
755 NOTE: This is the only change to the code we make during
756 the analysis phase, before deciding to vectorize the loop. */
757 if (vect_print_dump_info (REPORT_DETAILS))
758 fprintf (vect_dump, "force alignment");
759 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
760 DECL_USER_ALIGN (base) = 1;
763 /* At this point we assume that the base is aligned. */
764 gcc_assert (base_aligned
765 || (TREE_CODE (base) == VAR_DECL
766 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
768 /* Modulo alignment. */
769 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
771 if (tree_int_cst_sgn (misalign) < 0)
773 /* Negative misalignment value. */
774 if (vect_print_dump_info (REPORT_DETAILS))
775 fprintf (vect_dump, "unexpected misalign value");
779 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
781 if (vect_print_dump_info (REPORT_DETAILS))
783 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
784 print_generic_expr (vect_dump, ref, TDF_SLIM);
791 /* Function vect_compute_data_refs_alignment
793 Compute the misalignment of data references in the loop.
794 This pass may take place at function granularity instead of at loop
797 FOR NOW: No analysis is actually performed. Misalignment is calculated
798 only for trivial cases. TODO. */
801 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
803 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
806 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
808 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
809 if (!vect_compute_data_ref_alignment (dr))
817 /* Function vect_enhance_data_refs_alignment
819 This pass will use loop versioning and loop peeling in order to enhance
820 the alignment of data references in the loop.
822 FOR NOW: we assume that whatever versioning/peeling takes place, only the
823 original loop is to be vectorized; Any other loops that are created by
824 the transformations performed in this pass - are not supposed to be
825 vectorized. This restriction will be relaxed. */
828 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
830 varray_type loop_datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
831 varray_type datarefs;
832 VEC(dr_p,heap) *same_align_drs;
833 struct data_reference *dr0 = NULL;
834 struct data_reference *dr;
839 This pass will require a cost model to guide it whether to apply peeling
840 or versioning or a combination of the two. For example, the scheme that
841 intel uses when given a loop with several memory accesses, is as follows:
842 choose one memory access ('p') which alignment you want to force by doing
843 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
844 other accesses are not necessarily aligned, or (2) use loop versioning to
845 generate one loop in which all accesses are aligned, and another loop in
846 which only 'p' is necessarily aligned.
848 ("Automatic Intra-Register Vectorization for the Intel Architecture",
849 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
850 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
852 Devising a cost model is the most critical aspect of this work. It will
853 guide us on which access to peel for, whether to use loop versioning, how
854 many versions to create, etc. The cost model will probably consist of
855 generic considerations as well as target specific considerations (on
856 powerpc for example, misaligned stores are more painful than misaligned
859 Here is the general steps involved in alignment enhancements:
861 -- original loop, before alignment analysis:
863 x = q[i]; # DR_MISALIGNMENT(q) = unknown
864 p[i] = y; # DR_MISALIGNMENT(p) = unknown
867 -- After vect_compute_data_refs_alignment:
869 x = q[i]; # DR_MISALIGNMENT(q) = 3
870 p[i] = y; # DR_MISALIGNMENT(p) = unknown
873 -- Possibility 1: we do loop versioning:
875 for (i=0; i<N; i++){ # loop 1A
876 x = q[i]; # DR_MISALIGNMENT(q) = 3
877 p[i] = y; # DR_MISALIGNMENT(p) = 0
881 for (i=0; i<N; i++){ # loop 1B
882 x = q[i]; # DR_MISALIGNMENT(q) = 3
883 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
887 -- Possibility 2: we do loop peeling:
888 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
892 for (i = 3; i < N; i++){ # loop 2A
893 x = q[i]; # DR_MISALIGNMENT(q) = 0
894 p[i] = y; # DR_MISALIGNMENT(p) = unknown
897 -- Possibility 3: combination of loop peeling and versioning:
898 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
903 for (i = 3; i<N; i++){ # loop 3A
904 x = q[i]; # DR_MISALIGNMENT(q) = 0
905 p[i] = y; # DR_MISALIGNMENT(p) = 0
909 for (i = 3; i<N; i++){ # loop 3B
910 x = q[i]; # DR_MISALIGNMENT(q) = 0
911 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
915 These loops are later passed to loop_transform to be vectorized. The
916 vectorizer will use the alignment information to guide the transformation
917 (whether to generate regular loads/stores, or with special handling for
921 /* (1) Peeling to force alignment. */
923 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
925 + How many accesses will become aligned due to the peeling
926 - How many accesses will become unaligned due to the peeling,
927 and the cost of misaligned accesses.
928 - The cost of peeling (the extra runtime checks, the increase
931 The scheme we use FORNOW: peel to force the alignment of the first
932 misaligned store in the loop.
933 Rationale: misaligned stores are not yet supported.
935 TODO: Use a better cost model. */
937 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_datarefs); i++)
939 dr0 = VARRAY_GENERIC_PTR (loop_datarefs, i);
940 if (!DR_IS_READ (dr0) && !aligned_access_p (dr0))
942 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
943 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
948 /* (1.2) Update the alignment info according to the peeling factor.
949 If the misalignment of the DR we peel for is M, then the
950 peeling factor is VF - M, and the misalignment of each access DR_i
951 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
952 If the misalignment of the DR we peel for is unknown, then the
953 misalignment of each access DR_i in the loop is also unknown.
955 TODO: - consider accesses that are known to have the same
956 alignment, even if that alignment is unknown. */
958 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
963 if (known_alignment_for_access_p (dr0))
965 /* Since it's known at compile time, compute the number of iterations
966 in the peeled loop (the peeling factor) for use in updating
967 DR_MISALIGNMENT values. The peeling factor is the vectorization
968 factor minus the misalignment as an element count. */
969 mis = DR_MISALIGNMENT (dr0);
970 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
971 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
974 datarefs = loop_datarefs;
976 for (j = 0; j < 2; j++)
978 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
980 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
982 if (dr == dr0 || (!check_loads && DR_IS_READ (dr)))
984 if (known_alignment_for_access_p (dr)
985 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
986 DR_MISALIGNMENT (dr) = 0;
987 else if (known_alignment_for_access_p (dr)
988 && known_alignment_for_access_p (dr0))
991 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
993 DR_MISALIGNMENT (dr) += npeel * drsize;
994 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
997 DR_MISALIGNMENT (dr) = -1;
1003 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr0)));
1004 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, dr); i++)
1006 DR_MISALIGNMENT (dr) = 0;
1009 DR_MISALIGNMENT (dr0) = 0;
1014 /* Function vect_analyze_data_refs_alignment
1016 Analyze the alignment of the data-references in the loop.
1017 FOR NOW: Until support for misaligned accesses is in place, only if all
1018 accesses are aligned can the loop be vectorized. This restriction will be
1022 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1024 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1025 enum dr_alignment_support supportable_dr_alignment;
1028 if (vect_print_dump_info (REPORT_DETAILS))
1029 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1032 /* This pass may take place at function granularity instead of at loop
1035 if (!vect_compute_data_refs_alignment (loop_vinfo))
1037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1039 "not vectorized: can't calculate alignment for data ref.");
1044 /* This pass will decide on using loop versioning and/or loop peeling in
1045 order to enhance the alignment of data references in the loop. */
1047 vect_enhance_data_refs_alignment (loop_vinfo);
1050 /* Finally, check that all the data references in the loop can be
1051 handled with respect to their alignment. */
1053 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1055 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1056 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1057 if (!supportable_dr_alignment)
1059 if (DR_IS_READ (dr))
1061 "not vectorized: unsupported unaligned load.");
1064 "not vectorized: unsupported unaligned store.");
1067 if (supportable_dr_alignment != dr_aligned
1068 && (vect_print_dump_info (REPORT_ALIGNMENT)))
1069 fprintf (vect_dump, "Vectorizing an unaligned access.");
1071 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1072 && vect_print_dump_info (REPORT_ALIGNMENT))
1073 fprintf (vect_dump, "Alignment of access forced using peeling.");
1078 /* Function vect_analyze_data_ref_access.
1080 Analyze the access pattern of the data-reference DR. For now, a data access
1081 has to consecutive to be considered vectorizable. */
1084 vect_analyze_data_ref_access (struct data_reference *dr)
1086 tree step = DR_STEP (dr);
1087 tree scalar_type = TREE_TYPE (DR_REF (dr));
1089 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1091 if (vect_print_dump_info (REPORT_DETAILS))
1092 fprintf (vect_dump, "not consecutive access");
1099 /* Function vect_analyze_data_ref_accesses.
1101 Analyze the access pattern of all the data references in the loop.
1103 FORNOW: the only access pattern that is considered vectorizable is a
1104 simple step 1 (consecutive) access.
1106 FORNOW: handle only arrays and pointer accesses. */
1109 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1112 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1114 if (vect_print_dump_info (REPORT_DETAILS))
1115 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1117 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1119 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1120 if (!vect_analyze_data_ref_access (dr))
1122 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1123 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1132 /* Function vect_analyze_data_refs.
1134 Find all the data references in the loop.
1136 The general structure of the analysis of data refs in the vectorizer is as
1138 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1139 find and analyze all data-refs in the loop and their dependences.
1140 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1141 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1142 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1147 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1149 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1151 varray_type datarefs;
1154 if (vect_print_dump_info (REPORT_DETAILS))
1155 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1157 compute_data_dependences_for_loop (loop, false,
1158 &(LOOP_VINFO_DATAREFS (loop_vinfo)),
1159 &(LOOP_VINFO_DDRS (loop_vinfo)));
1161 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1162 from stmt_vec_info struct to DR and vectype. */
1163 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1164 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1166 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1168 stmt_vec_info stmt_info;
1170 if (!dr || !DR_REF (dr))
1172 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1173 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1177 /* Update DR field in stmt_vec_info struct. */
1178 stmt = DR_STMT (dr);
1179 stmt_info = vinfo_for_stmt (stmt);
1181 if (STMT_VINFO_DATA_REF (stmt_info))
1183 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1186 "not vectorized: more than one data ref in stmt: ");
1187 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1191 STMT_VINFO_DATA_REF (stmt_info) = dr;
1193 /* Check that analysis of the data-ref succeeded. */
1194 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1197 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1199 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1200 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1204 if (!DR_MEMTAG (dr))
1206 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1208 fprintf (vect_dump, "not vectorized: no memory tag for ");
1209 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1214 /* Set vectype for STMT. */
1215 scalar_type = TREE_TYPE (DR_REF (dr));
1216 STMT_VINFO_VECTYPE (stmt_info) =
1217 get_vectype_for_scalar_type (scalar_type);
1218 if (!STMT_VINFO_VECTYPE (stmt_info))
1220 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1223 "not vectorized: no vectype for stmt: ");
1224 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1225 fprintf (vect_dump, " scalar_type: ");
1226 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1236 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1238 /* Function vect_mark_relevant.
1240 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1243 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1244 bool relevant_p, bool live_p)
1246 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1247 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
1248 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1250 if (vect_print_dump_info (REPORT_DETAILS))
1251 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
1253 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
1254 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
1256 if (TREE_CODE (stmt) == PHI_NODE)
1257 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
1258 or live will fail vectorization later on. */
1261 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
1262 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
1264 if (vect_print_dump_info (REPORT_DETAILS))
1265 fprintf (vect_dump, "already marked relevant/live.");
1269 VEC_safe_push (tree, heap, *worklist, stmt);
1273 /* Function vect_stmt_relevant_p.
1275 Return true if STMT in loop that is represented by LOOP_VINFO is
1276 "relevant for vectorization".
1278 A stmt is considered "relevant for vectorization" if:
1279 - it has uses outside the loop.
1280 - it has vdefs (it alters memory).
1281 - control stmts in the loop (except for the exit condition).
1283 CHECKME: what other side effects would the vectorizer allow? */
1286 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
1287 bool *relevant_p, bool *live_p)
1289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1290 ssa_op_iter op_iter;
1291 imm_use_iterator imm_iter;
1292 use_operand_p use_p;
1293 def_operand_p def_p;
1295 *relevant_p = false;
1298 /* cond stmt other than loop exit cond. */
1299 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1302 /* changing memory. */
1303 if (TREE_CODE (stmt) != PHI_NODE)
1304 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
1306 if (vect_print_dump_info (REPORT_DETAILS))
1307 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1311 /* uses outside the loop. */
1312 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
1314 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
1316 basic_block bb = bb_for_stmt (USE_STMT (use_p));
1317 if (!flow_bb_inside_loop_p (loop, bb))
1319 if (vect_print_dump_info (REPORT_DETAILS))
1320 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1322 /* We expect all such uses to be in the loop exit phis
1323 (because of loop closed form) */
1324 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
1325 gcc_assert (bb == loop->single_exit->dest);
1332 return (*live_p || *relevant_p);
1336 /* Function vect_mark_stmts_to_be_vectorized.
1338 Not all stmts in the loop need to be vectorized. For example:
1347 Stmt 1 and 3 do not need to be vectorized, because loop control and
1348 addressing of vectorized data-refs are handled differently.
1350 This pass detects such stmts. */
1353 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
1355 VEC(tree,heap) *worklist;
1356 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1357 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1358 unsigned int nbbs = loop->num_nodes;
1359 block_stmt_iterator si;
1364 stmt_vec_info stmt_vinfo;
1367 bool relevant_p, live_p;
1369 enum vect_def_type dt;
1371 if (vect_print_dump_info (REPORT_DETAILS))
1372 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
1374 worklist = VEC_alloc (tree, heap, 64);
1376 /* 1. Init worklist. */
1379 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1381 if (vect_print_dump_info (REPORT_DETAILS))
1383 fprintf (vect_dump, "init: phi relevant? ");
1384 print_generic_expr (vect_dump, phi, TDF_SLIM);
1387 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
1388 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
1391 for (i = 0; i < nbbs; i++)
1394 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1396 stmt = bsi_stmt (si);
1398 if (vect_print_dump_info (REPORT_DETAILS))
1400 fprintf (vect_dump, "init: stmt relevant? ");
1401 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1404 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
1405 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
1410 /* 2. Process_worklist */
1412 while (VEC_length (tree, worklist) > 0)
1414 stmt = VEC_pop (tree, worklist);
1416 if (vect_print_dump_info (REPORT_DETAILS))
1418 fprintf (vect_dump, "worklist: examine stmt: ");
1419 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1422 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
1423 in the loop, mark the stmt that defines it (DEF_STMT) as
1424 relevant/irrelevant and live/dead according to the liveness and
1425 relevance properties of STMT.
1428 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
1430 ann = stmt_ann (stmt);
1431 stmt_vinfo = vinfo_for_stmt (stmt);
1433 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
1434 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
1436 /* Generally, the liveness and relevance properties of STMT are
1437 propagated to the DEF_STMTs of its USEs:
1438 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
1439 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
1444 If USE is used only for address computations (e.g. array indexing),
1445 which does not need to be directly vectorized, then the
1446 liveness/relevance of the respective DEF_STMT is left unchanged.
1449 If STMT has been identified as defining a reduction variable, then
1452 The last use of STMT is the reduction-variable, which is defined
1453 by a loop-header-phi. We don't want to mark the phi as live or
1454 relevant (because it does not need to be vectorized, it is handled
1455 as part of the vectorization of the reduction), so in this case we
1456 skip the call to vect_mark_relevant.
1458 The rest of the uses of STMT are defined in the loop body. For
1459 the def_stmt of these uses we want to set liveness/relevance
1461 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
1462 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
1463 because even though STMT is classified as live (since it defines a
1464 value that is used across loop iterations) and irrelevant (since it
1465 is not used inside the loop), it will be vectorized, and therefore
1466 the corresponding DEF_STMTs need to marked as relevant.
1470 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
1472 gcc_assert (!relevant_p && live_p);
1477 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
1479 /* case 1: we are only interested in uses that need to be vectorized.
1480 Uses that are used for address computation are not considered
1483 if (!exist_non_indexing_operands_for_use_p (use, stmt))
1486 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
1488 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1489 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
1490 VEC_free (tree, heap, worklist);
1494 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
1497 if (vect_print_dump_info (REPORT_DETAILS))
1499 fprintf (vect_dump, "worklist: examine use %d: ", i);
1500 print_generic_expr (vect_dump, use, TDF_SLIM);
1503 bb = bb_for_stmt (def_stmt);
1504 if (!flow_bb_inside_loop_p (loop, bb))
1507 /* case 2.1: the reduction-use does not mark the defining-phi
1509 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
1510 && TREE_CODE (def_stmt) == PHI_NODE)
1513 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
1515 } /* while worklist */
1517 VEC_free (tree, heap, worklist);
1522 /* Function vect_can_advance_ivs_p
1524 In case the number of iterations that LOOP iterates in unknown at compile
1525 time, an epilog loop will be generated, and the loop induction variables
1526 (IVs) will be "advanced" to the value they are supposed to take just before
1527 the epilog loop. Here we check that the access function of the loop IVs
1528 and the expression that represents the loop bound are simple enough.
1529 These restrictions will be relaxed in the future. */
1532 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
1534 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1535 basic_block bb = loop->header;
1538 /* Analyze phi functions of the loop header. */
1540 if (vect_print_dump_info (REPORT_DETAILS))
1541 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
1543 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1545 tree access_fn = NULL;
1546 tree evolution_part;
1548 if (vect_print_dump_info (REPORT_DETAILS))
1550 fprintf (vect_dump, "Analyze phi: ");
1551 print_generic_expr (vect_dump, phi, TDF_SLIM);
1554 /* Skip virtual phi's. The data dependences that are associated with
1555 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
1557 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1559 if (vect_print_dump_info (REPORT_DETAILS))
1560 fprintf (vect_dump, "virtual phi. skip.");
1564 /* Skip reduction phis. */
1566 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
1568 if (vect_print_dump_info (REPORT_DETAILS))
1569 fprintf (vect_dump, "reduc phi. skip.");
1573 /* Analyze the evolution function. */
1575 access_fn = instantiate_parameters
1576 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
1580 if (vect_print_dump_info (REPORT_DETAILS))
1581 fprintf (vect_dump, "No Access function.");
1585 if (vect_print_dump_info (REPORT_DETAILS))
1587 fprintf (vect_dump, "Access function of PHI: ");
1588 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1591 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1593 if (evolution_part == NULL_TREE)
1595 if (vect_print_dump_info (REPORT_DETAILS))
1596 fprintf (vect_dump, "No evolution.");
1600 /* FORNOW: We do not transform initial conditions of IVs
1601 which evolution functions are a polynomial of degree >= 2. */
1603 if (tree_is_chrec (evolution_part))
1611 /* Function vect_get_loop_niters.
1613 Determine how many iterations the loop is executed.
1614 If an expression that represents the number of iterations
1615 can be constructed, place it in NUMBER_OF_ITERATIONS.
1616 Return the loop exit condition. */
1619 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
1623 if (vect_print_dump_info (REPORT_DETAILS))
1624 fprintf (vect_dump, "=== get_loop_niters ===");
1626 niters = number_of_iterations_in_loop (loop);
1628 if (niters != NULL_TREE
1629 && niters != chrec_dont_know)
1631 *number_of_iterations = niters;
1633 if (vect_print_dump_info (REPORT_DETAILS))
1635 fprintf (vect_dump, "==> get_loop_niters:" );
1636 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
1640 return get_loop_exit_condition (loop);
1644 /* Function vect_analyze_loop_form.
1646 Verify the following restrictions (some may be relaxed in the future):
1647 - it's an inner-most loop
1648 - number of BBs = 2 (which are the loop header and the latch)
1649 - the loop has a pre-header
1650 - the loop has a single entry and exit
1651 - the loop exit condition is simple enough, and the number of iterations
1652 can be analyzed (a countable loop). */
1654 static loop_vec_info
1655 vect_analyze_loop_form (struct loop *loop)
1657 loop_vec_info loop_vinfo;
1659 tree number_of_iterations = NULL;
1661 if (vect_print_dump_info (REPORT_DETAILS))
1662 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
1666 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
1667 fprintf (vect_dump, "not vectorized: nested loop.");
1671 if (!loop->single_exit
1672 || loop->num_nodes != 2
1673 || EDGE_COUNT (loop->header->preds) != 2)
1675 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1677 if (!loop->single_exit)
1678 fprintf (vect_dump, "not vectorized: multiple exits.");
1679 else if (loop->num_nodes != 2)
1680 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
1681 else if (EDGE_COUNT (loop->header->preds) != 2)
1682 fprintf (vect_dump, "not vectorized: too many incoming edges.");
1688 /* We assume that the loop exit condition is at the end of the loop. i.e,
1689 that the loop is represented as a do-while (with a proper if-guard
1690 before the loop if needed), where the loop header contains all the
1691 executable statements, and the latch is empty. */
1692 if (!empty_block_p (loop->latch))
1694 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1695 fprintf (vect_dump, "not vectorized: unexpected loop form.");
1699 /* Make sure there exists a single-predecessor exit bb: */
1700 if (!single_pred_p (loop->single_exit->dest))
1702 edge e = loop->single_exit;
1703 if (!(e->flags & EDGE_ABNORMAL))
1705 split_loop_exit_edge (e);
1706 if (vect_print_dump_info (REPORT_DETAILS))
1707 fprintf (vect_dump, "split exit edge.");
1711 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1712 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
1717 if (empty_block_p (loop->header))
1719 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1720 fprintf (vect_dump, "not vectorized: empty loop.");
1724 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
1727 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1728 fprintf (vect_dump, "not vectorized: complicated exit condition.");
1732 if (!number_of_iterations)
1734 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1736 "not vectorized: number of iterations cannot be computed.");
1740 if (chrec_contains_undetermined (number_of_iterations))
1742 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
1743 fprintf (vect_dump, "Infinite number of iterations.");
1747 loop_vinfo = new_loop_vec_info (loop);
1748 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1750 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1752 if (vect_print_dump_info (REPORT_DETAILS))
1754 fprintf (vect_dump, "Symbolic number of iterations is ");
1755 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
1759 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
1761 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1762 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
1766 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
1772 /* Function vect_analyze_loop.
1774 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1775 for it. The different analyses will record information in the
1776 loop_vec_info struct. */
1778 vect_analyze_loop (struct loop *loop)
1781 loop_vec_info loop_vinfo;
1783 if (vect_print_dump_info (REPORT_DETAILS))
1784 fprintf (vect_dump, "===== analyze_loop_nest =====");
1786 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
1788 loop_vinfo = vect_analyze_loop_form (loop);
1791 if (vect_print_dump_info (REPORT_DETAILS))
1792 fprintf (vect_dump, "bad loop form.");
1796 /* Find all data references in the loop (which correspond to vdefs/vuses)
1797 and analyze their evolution in the loop.
1799 FORNOW: Handle only simple, array references, which
1800 alignment can be forced, and aligned pointer-references. */
1802 ok = vect_analyze_data_refs (loop_vinfo);
1805 if (vect_print_dump_info (REPORT_DETAILS))
1806 fprintf (vect_dump, "bad data references.");
1807 destroy_loop_vec_info (loop_vinfo);
1811 /* Classify all cross-iteration scalar data-flow cycles.
1812 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1814 vect_analyze_scalar_cycles (loop_vinfo);
1816 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1818 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1821 if (vect_print_dump_info (REPORT_DETAILS))
1822 fprintf (vect_dump, "unexpected pattern.");
1823 destroy_loop_vec_info (loop_vinfo);
1827 ok = vect_determine_vectorization_factor (loop_vinfo);
1830 if (vect_print_dump_info (REPORT_DETAILS))
1831 fprintf (vect_dump, "can't determine vectorization factor.");
1832 destroy_loop_vec_info (loop_vinfo);
1836 /* Analyze data dependences between the data-refs in the loop.
1837 FORNOW: fail at the first data dependence that we encounter. */
1839 ok = vect_analyze_data_ref_dependences (loop_vinfo);
1842 if (vect_print_dump_info (REPORT_DETAILS))
1843 fprintf (vect_dump, "bad data dependence.");
1844 destroy_loop_vec_info (loop_vinfo);
1848 /* Analyze the access patterns of the data-refs in the loop (consecutive,
1849 complex, etc.). FORNOW: Only handle consecutive access pattern. */
1851 ok = vect_analyze_data_ref_accesses (loop_vinfo);
1854 if (vect_print_dump_info (REPORT_DETAILS))
1855 fprintf (vect_dump, "bad data access.");
1856 destroy_loop_vec_info (loop_vinfo);
1860 /* Analyze the alignment of the data-refs in the loop.
1861 FORNOW: Only aligned accesses are handled. */
1863 ok = vect_analyze_data_refs_alignment (loop_vinfo);
1866 if (vect_print_dump_info (REPORT_DETAILS))
1867 fprintf (vect_dump, "bad data alignment.");
1868 destroy_loop_vec_info (loop_vinfo);
1872 /* Scan all the operations in the loop and make sure they are
1875 ok = vect_analyze_operations (loop_vinfo);
1878 if (vect_print_dump_info (REPORT_DETAILS))
1879 fprintf (vect_dump, "bad operation or unsupported loop bound.");
1880 destroy_loop_vec_info (loop_vinfo);
1884 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;