1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info, bool);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
103 if (vect_print_dump_info (REPORT_DETAILS))
104 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
106 for (i = 0; i < nbbs; i++)
108 basic_block bb = bbs[i];
110 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
112 tree stmt = bsi_stmt (si);
114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining statement: ");
120 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 gcc_assert (stmt_info);
124 /* skip stmts which do not need to be vectorized. */
125 if (!STMT_VINFO_RELEVANT_P (stmt_info)
126 && !STMT_VINFO_LIVE_P (stmt_info))
128 if (vect_print_dump_info (REPORT_DETAILS))
129 fprintf (vect_dump, "skip.");
133 if (!GIMPLE_STMT_P (stmt)
134 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
136 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
138 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
139 print_generic_expr (vect_dump, stmt, TDF_SLIM);
144 if (STMT_VINFO_VECTYPE (stmt_info))
146 vectype = STMT_VINFO_VECTYPE (stmt_info);
147 scalar_type = TREE_TYPE (vectype);
151 if (STMT_VINFO_DATA_REF (stmt_info))
153 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
154 else if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
155 scalar_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
157 scalar_type = TREE_TYPE (stmt);
159 if (vect_print_dump_info (REPORT_DETAILS))
161 fprintf (vect_dump, "get vectype for scalar type: ");
162 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
165 vectype = get_vectype_for_scalar_type (scalar_type);
168 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
171 "not vectorized: unsupported data-type ");
172 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
176 STMT_VINFO_VECTYPE (stmt_info) = vectype;
179 if (vect_print_dump_info (REPORT_DETAILS))
181 fprintf (vect_dump, "vectype: ");
182 print_generic_expr (vect_dump, vectype, TDF_SLIM);
185 nunits = TYPE_VECTOR_SUBPARTS (vectype);
186 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "nunits = %d", nunits);
189 if (!vectorization_factor
190 || (nunits > vectorization_factor))
191 vectorization_factor = nunits;
196 /* TODO: Analyze cost. Decide if worth while to vectorize. */
198 if (vectorization_factor <= 1)
200 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
201 fprintf (vect_dump, "not vectorized: unsupported data-type");
204 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
210 /* Function vect_analyze_operations.
212 Scan the loop stmts and make sure they are all vectorizable. */
215 vect_analyze_operations (loop_vec_info loop_vinfo)
217 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
218 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
219 int nbbs = loop->num_nodes;
220 block_stmt_iterator si;
221 unsigned int vectorization_factor = 0;
225 stmt_vec_info stmt_info;
226 bool need_to_vectorize = false;
228 if (vect_print_dump_info (REPORT_DETAILS))
229 fprintf (vect_dump, "=== vect_analyze_operations ===");
231 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
232 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
234 for (i = 0; i < nbbs; i++)
236 basic_block bb = bbs[i];
238 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
240 stmt_info = vinfo_for_stmt (phi);
241 if (vect_print_dump_info (REPORT_DETAILS))
243 fprintf (vect_dump, "examining phi: ");
244 print_generic_expr (vect_dump, phi, TDF_SLIM);
247 gcc_assert (stmt_info);
249 if (STMT_VINFO_LIVE_P (stmt_info))
251 /* FORNOW: not yet supported. */
252 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
253 fprintf (vect_dump, "not vectorized: value used after loop.");
257 if (STMT_VINFO_RELEVANT_P (stmt_info))
259 /* Most likely a reduction-like computation that is used
261 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
262 fprintf (vect_dump, "not vectorized: unsupported pattern.");
267 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
269 tree stmt = bsi_stmt (si);
270 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
272 if (vect_print_dump_info (REPORT_DETAILS))
274 fprintf (vect_dump, "==> examining statement: ");
275 print_generic_expr (vect_dump, stmt, TDF_SLIM);
278 gcc_assert (stmt_info);
280 /* skip stmts which do not need to be vectorized.
281 this is expected to include:
282 - the COND_EXPR which is the loop exit condition
283 - any LABEL_EXPRs in the loop
284 - computations that are used only for array indexing or loop
287 if (!STMT_VINFO_RELEVANT_P (stmt_info)
288 && !STMT_VINFO_LIVE_P (stmt_info))
290 if (vect_print_dump_info (REPORT_DETAILS))
291 fprintf (vect_dump, "irrelevant.");
295 if (STMT_VINFO_RELEVANT_P (stmt_info))
297 gcc_assert (GIMPLE_STMT_P (stmt)
298 || !VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
299 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
301 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
302 || vectorizable_type_demotion (stmt, NULL, NULL)
303 || vectorizable_operation (stmt, NULL, NULL)
304 || vectorizable_assignment (stmt, NULL, NULL)
305 || vectorizable_load (stmt, NULL, NULL)
306 || vectorizable_call (stmt, NULL, NULL)
307 || vectorizable_store (stmt, NULL, NULL)
308 || vectorizable_condition (stmt, NULL, NULL));
312 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
315 "not vectorized: relevant stmt not supported: ");
316 print_generic_expr (vect_dump, stmt, TDF_SLIM);
320 need_to_vectorize = true;
323 if (STMT_VINFO_LIVE_P (stmt_info))
325 ok = vectorizable_reduction (stmt, NULL, NULL);
328 need_to_vectorize = true;
330 ok = vectorizable_live_operation (stmt, NULL, NULL);
334 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
337 "not vectorized: live stmt not supported: ");
338 print_generic_expr (vect_dump, stmt, TDF_SLIM);
346 /* TODO: Analyze cost. Decide if worth while to vectorize. */
348 /* All operations in the loop are either irrelevant (deal with loop
349 control, or dead), or only used outside the loop and can be moved
350 out of the loop (e.g. invariants, inductions). The loop can be
351 optimized away by scalar optimizations. We're better off not
352 touching this loop. */
353 if (!need_to_vectorize)
355 if (vect_print_dump_info (REPORT_DETAILS))
357 "All the computation can be taken out of the loop.");
358 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
360 "not vectorized: redundant loop. no profit to vectorize.");
364 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
365 && vect_print_dump_info (REPORT_DETAILS))
367 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
368 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
370 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
371 && ((LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
372 || (LOOP_VINFO_INT_NITERS (loop_vinfo) <=
373 ((unsigned) (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
374 * vectorization_factor))))
376 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
377 fprintf (vect_dump, "not vectorized: iteration count too small.");
381 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
382 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
383 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
385 if (vect_print_dump_info (REPORT_DETAILS))
386 fprintf (vect_dump, "epilog loop required.");
387 if (!vect_can_advance_ivs_p (loop_vinfo))
389 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
391 "not vectorized: can't create epilog loop 1.");
394 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
396 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
398 "not vectorized: can't create epilog loop 2.");
407 /* Function exist_non_indexing_operands_for_use_p
409 USE is one of the uses attached to STMT. Check if USE is
410 used in STMT for anything other than indexing an array. */
413 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
416 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
418 /* USE corresponds to some operand in STMT. If there is no data
419 reference in STMT, then any operand that corresponds to USE
420 is not indexing an array. */
421 if (!STMT_VINFO_DATA_REF (stmt_info))
424 /* STMT has a data_ref. FORNOW this means that its of one of
428 (This should have been verified in analyze_data_refs).
430 'var' in the second case corresponds to a def, not a use,
431 so USE cannot correspond to any operands that are not used
434 Therefore, all we need to check is if STMT falls into the
435 first case, and whether var corresponds to USE. */
437 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) == SSA_NAME)
440 operand = GIMPLE_STMT_OPERAND (stmt, 1);
442 if (TREE_CODE (operand) != SSA_NAME)
452 /* Function vect_analyze_scalar_cycles.
454 Examine the cross iteration def-use cycles of scalar variables, by
455 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
456 following: invariant, induction, reduction, unknown.
458 Some forms of scalar cycles are not yet supported.
460 Example1: reduction: (unsupported yet)
466 Example2: induction: (unsupported yet)
472 Note: the following loop *is* vectorizable:
478 even though it has a def-use cycle caused by the induction variable i:
480 loop: i_2 = PHI (i_0, i_1)
485 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
486 it does not need to be vectorized because it is only used for array
487 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
488 loop2 on the other hand is relevant (it is being written to memory).
492 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
495 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
496 basic_block bb = loop->header;
499 if (vect_print_dump_info (REPORT_DETAILS))
500 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
502 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
504 tree access_fn = NULL;
505 tree def = PHI_RESULT (phi);
506 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
509 if (vect_print_dump_info (REPORT_DETAILS))
511 fprintf (vect_dump, "Analyze phi: ");
512 print_generic_expr (vect_dump, phi, TDF_SLIM);
515 /* Skip virtual phi's. The data dependences that are associated with
516 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
518 if (!is_gimple_reg (SSA_NAME_VAR (def)))
520 if (vect_print_dump_info (REPORT_DETAILS))
521 fprintf (vect_dump, "virtual phi. skip.");
525 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
527 /* Analyze the evolution function. */
529 access_fn = analyze_scalar_evolution (loop, def);
534 if (vect_print_dump_info (REPORT_DETAILS))
536 fprintf (vect_dump, "Access function of PHI: ");
537 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
540 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
542 if (vect_print_dump_info (REPORT_DETAILS))
543 fprintf (vect_dump, "Detected induction.");
544 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
548 /* TODO: handle invariant phis */
550 reduc_stmt = vect_is_simple_reduction (loop, phi);
553 if (vect_print_dump_info (REPORT_DETAILS))
554 fprintf (vect_dump, "Detected reduction.");
555 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
556 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
560 if (vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump, "Unknown def-use cycle pattern.");
569 /* Function vect_insert_into_interleaving_chain.
571 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
574 vect_insert_into_interleaving_chain (struct data_reference *dra,
575 struct data_reference *drb)
577 tree prev, next, next_init;
578 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
579 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
581 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
582 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
585 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
586 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
589 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
590 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
594 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
597 /* We got to the end of the list. Insert here. */
598 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
599 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
603 /* Function vect_update_interleaving_chain.
605 For two data-refs DRA and DRB that are a part of a chain interleaved data
606 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
608 There are four possible cases:
609 1. New stmts - both DRA and DRB are not a part of any chain:
612 2. DRB is a part of a chain and DRA is not:
613 no need to update FIRST_DR
614 no need to insert DRB
615 insert DRA according to init
616 3. DRA is a part of a chain and DRB is not:
617 if (init of FIRST_DR > init of DRB)
619 NEXT(FIRST_DR) = previous FIRST_DR
621 insert DRB according to its init
622 4. both DRA and DRB are in some interleaving chains:
623 choose the chain with the smallest init of FIRST_DR
624 insert the nodes of the second chain into the first one. */
627 vect_update_interleaving_chain (struct data_reference *drb,
628 struct data_reference *dra)
630 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
631 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
632 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
633 tree node, prev, next, node_init, first_stmt;
635 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
636 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
638 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
639 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
640 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
644 /* 2. DRB is a part of a chain and DRA is not. */
645 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
647 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
648 /* Insert DRA into the chain of DRB. */
649 vect_insert_into_interleaving_chain (dra, drb);
653 /* 3. DRA is a part of a chain and DRB is not. */
654 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
656 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
657 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
661 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
663 /* DRB's init is smaller than the init of the stmt previously marked
664 as the first stmt of the interleaving chain of DRA. Therefore, we
665 update FIRST_STMT and put DRB in the head of the list. */
666 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
667 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
669 /* Update all the stmts in the list to point to the new FIRST_STMT. */
670 tmp = old_first_stmt;
673 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
674 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
679 /* Insert DRB in the list of DRA. */
680 vect_insert_into_interleaving_chain (drb, dra);
681 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
686 /* 4. both DRA and DRB are in some interleaving chains. */
687 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
688 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
689 if (first_a == first_b)
691 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
692 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
694 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
696 /* Insert the nodes of DRA chain into the DRB chain.
697 After inserting a node, continue from this node of the DRB chain (don't
698 start from the beginning. */
699 node = DR_GROUP_FIRST_DR (stmtinfo_a);
700 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
701 first_stmt = first_b;
705 /* Insert the nodes of DRB chain into the DRA chain.
706 After inserting a node, continue from this node of the DRA chain (don't
707 start from the beginning. */
708 node = DR_GROUP_FIRST_DR (stmtinfo_b);
709 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
710 first_stmt = first_a;
715 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
716 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
719 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
720 if (tree_int_cst_compare (next_init, node_init) > 0)
723 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
724 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
729 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
733 /* We got to the end of the list. Insert here. */
734 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
735 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
738 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
739 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
744 /* Function vect_equal_offsets.
746 Check if OFFSET1 and OFFSET2 are identical expressions. */
749 vect_equal_offsets (tree offset1, tree offset2)
753 STRIP_NOPS (offset1);
754 STRIP_NOPS (offset2);
756 if (offset1 == offset2)
759 if (TREE_CODE (offset1) != TREE_CODE (offset2)
760 || !BINARY_CLASS_P (offset1)
761 || !BINARY_CLASS_P (offset2))
764 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
765 TREE_OPERAND (offset2, 0));
766 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
767 TREE_OPERAND (offset2, 1));
769 return (res0 && res1);
773 /* Function vect_check_interleaving.
775 Check if DRA and DRB are a part of interleaving. In case they are, insert
776 DRA and DRB in an interleaving chain. */
779 vect_check_interleaving (struct data_reference *dra,
780 struct data_reference *drb)
782 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
784 /* Check that the data-refs have same first location (except init) and they
785 are both either store or load (not load and store). */
786 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
787 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
788 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
789 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
790 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
791 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
792 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
793 || DR_IS_READ (dra) != DR_IS_READ (drb))
797 1. data-refs are of the same type
798 2. their steps are equal
799 3. the step is greater than the difference between data-refs' inits */
800 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
801 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
803 if (type_size_a != type_size_b
804 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
807 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
808 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
809 step = TREE_INT_CST_LOW (DR_STEP (dra));
813 /* If init_a == init_b + the size of the type * k, we have an interleaving,
814 and DRB is accessed before DRA. */
815 diff_mod_size = (init_a - init_b) % type_size_a;
817 if ((init_a - init_b) > step)
820 if (diff_mod_size == 0)
822 vect_update_interleaving_chain (drb, dra);
823 if (vect_print_dump_info (REPORT_DR_DETAILS))
825 fprintf (vect_dump, "Detected interleaving ");
826 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
827 fprintf (vect_dump, " and ");
828 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
835 /* If init_b == init_a + the size of the type * k, we have an
836 interleaving, and DRA is accessed before DRB. */
837 diff_mod_size = (init_b - init_a) % type_size_a;
839 if ((init_b - init_a) > step)
842 if (diff_mod_size == 0)
844 vect_update_interleaving_chain (dra, drb);
845 if (vect_print_dump_info (REPORT_DR_DETAILS))
847 fprintf (vect_dump, "Detected interleaving ");
848 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
849 fprintf (vect_dump, " and ");
850 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
858 /* Function vect_analyze_data_ref_dependence.
860 Return TRUE if there (might) exist a dependence between a memory-reference
861 DRA and a memory-reference DRB. */
864 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
865 loop_vec_info loop_vinfo,
866 bool check_interleaving)
869 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
870 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
871 struct data_reference *dra = DDR_A (ddr);
872 struct data_reference *drb = DDR_B (ddr);
873 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
874 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
875 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
876 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
877 lambda_vector dist_v;
878 unsigned int loop_depth;
880 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
882 /* Independent data accesses. */
883 if (check_interleaving)
884 vect_check_interleaving (dra, drb);
888 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
891 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
893 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
896 "not vectorized: can't determine dependence between ");
897 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
898 fprintf (vect_dump, " and ");
899 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
904 if (DDR_NUM_DIST_VECTS (ddr) == 0)
906 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
908 fprintf (vect_dump, "not vectorized: bad dist vector for ");
909 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
910 fprintf (vect_dump, " and ");
911 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
916 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
917 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
919 int dist = dist_v[loop_depth];
921 if (vect_print_dump_info (REPORT_DR_DETAILS))
922 fprintf (vect_dump, "dependence distance = %d.", dist);
924 /* Same loop iteration. */
925 if (dist % vectorization_factor == 0 && dra_size == drb_size)
927 /* Two references with distance zero have the same alignment. */
928 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
929 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
930 if (vect_print_dump_info (REPORT_ALIGNMENT))
931 fprintf (vect_dump, "accesses have the same alignment.");
932 if (vect_print_dump_info (REPORT_DR_DETAILS))
934 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
935 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
936 fprintf (vect_dump, " and ");
937 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
942 if (abs (dist) >= vectorization_factor)
944 /* Dependence distance does not create dependence, as far as vectorization
945 is concerned, in this case. */
946 if (vect_print_dump_info (REPORT_DR_DETAILS))
947 fprintf (vect_dump, "dependence distance >= VF.");
951 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
954 "not vectorized: possible dependence between data-refs ");
955 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
956 fprintf (vect_dump, " and ");
957 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
967 /* Function vect_check_dependences.
969 Return TRUE if there is a store-store or load-store dependence between
970 data-refs in DDR, otherwise return FALSE. */
973 vect_check_dependences (struct data_dependence_relation *ddr)
975 struct data_reference *dra = DDR_A (ddr);
976 struct data_reference *drb = DDR_B (ddr);
978 if (DDR_ARE_DEPENDENT (ddr) == chrec_known || dra == drb)
979 /* Independent or same data accesses. */
982 if (DR_IS_READ (dra) == DR_IS_READ (drb) && DR_IS_READ (dra))
986 if (vect_print_dump_info (REPORT_DR_DETAILS))
988 fprintf (vect_dump, "possible store or store/load dependence between ");
989 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
990 fprintf (vect_dump, " and ");
991 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
997 /* Function vect_analyze_data_ref_dependences.
999 Examine all the data references in the loop, and make sure there do not
1000 exist any data dependences between them. */
1003 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1006 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1007 struct data_dependence_relation *ddr;
1008 bool check_interleaving = true;
1010 if (vect_print_dump_info (REPORT_DETAILS))
1011 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1013 /* We allow interleaving only if there are no store-store and load-store
1014 dependencies in the loop. */
1015 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1017 if (vect_check_dependences (ddr))
1019 check_interleaving = false;
1024 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1025 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, check_interleaving))
1032 /* Function vect_compute_data_ref_alignment
1034 Compute the misalignment of the data reference DR.
1037 1. If during the misalignment computation it is found that the data reference
1038 cannot be vectorized then false is returned.
1039 2. DR_MISALIGNMENT (DR) is defined.
1041 FOR NOW: No analysis is actually performed. Misalignment is calculated
1042 only for trivial cases. TODO. */
1045 vect_compute_data_ref_alignment (struct data_reference *dr)
1047 tree stmt = DR_STMT (dr);
1048 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1049 tree ref = DR_REF (dr);
1051 tree base, base_addr;
1054 tree aligned_to, alignment;
1056 if (vect_print_dump_info (REPORT_DETAILS))
1057 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1059 /* Initialize misalignment to unknown. */
1060 DR_MISALIGNMENT (dr) = -1;
1062 misalign = DR_OFFSET_MISALIGNMENT (dr);
1063 aligned_to = DR_ALIGNED_TO (dr);
1064 base_addr = DR_BASE_ADDRESS (dr);
1065 base = build_fold_indirect_ref (base_addr);
1066 vectype = STMT_VINFO_VECTYPE (stmt_info);
1067 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1069 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1072 if (vect_print_dump_info (REPORT_DETAILS))
1074 fprintf (vect_dump, "Unknown alignment for access: ");
1075 print_generic_expr (vect_dump, base, TDF_SLIM);
1081 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1083 || (TREE_CODE (base_addr) == SSA_NAME
1084 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1085 TREE_TYPE (base_addr)))),
1087 base_aligned = true;
1089 base_aligned = false;
1093 /* Do not change the alignment of global variables if
1094 flag_section_anchors is enabled. */
1095 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1096 || (TREE_STATIC (base) && flag_section_anchors))
1098 if (vect_print_dump_info (REPORT_DETAILS))
1100 fprintf (vect_dump, "can't force alignment of ref: ");
1101 print_generic_expr (vect_dump, ref, TDF_SLIM);
1106 /* Force the alignment of the decl.
1107 NOTE: This is the only change to the code we make during
1108 the analysis phase, before deciding to vectorize the loop. */
1109 if (vect_print_dump_info (REPORT_DETAILS))
1110 fprintf (vect_dump, "force alignment");
1111 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1112 DECL_USER_ALIGN (base) = 1;
1115 /* At this point we assume that the base is aligned. */
1116 gcc_assert (base_aligned
1117 || (TREE_CODE (base) == VAR_DECL
1118 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1120 /* Modulo alignment. */
1121 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1123 if (!host_integerp (misalign, 1))
1125 /* Negative or overflowed misalignment value. */
1126 if (vect_print_dump_info (REPORT_DETAILS))
1127 fprintf (vect_dump, "unexpected misalign value");
1131 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1133 if (vect_print_dump_info (REPORT_DETAILS))
1135 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1136 print_generic_expr (vect_dump, ref, TDF_SLIM);
1143 /* Function vect_compute_data_refs_alignment
1145 Compute the misalignment of data references in the loop.
1146 Return FALSE if a data reference is found that cannot be vectorized. */
1149 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1151 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1152 struct data_reference *dr;
1155 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1156 if (!vect_compute_data_ref_alignment (dr))
1163 /* Function vect_update_misalignment_for_peel
1165 DR - the data reference whose misalignment is to be adjusted.
1166 DR_PEEL - the data reference whose misalignment is being made
1167 zero in the vector loop by the peel.
1168 NPEEL - the number of iterations in the peel loop if the misalignment
1169 of DR_PEEL is known at compile time. */
1172 vect_update_misalignment_for_peel (struct data_reference *dr,
1173 struct data_reference *dr_peel, int npeel)
1176 VEC(dr_p,heap) *same_align_drs;
1177 struct data_reference *current_dr;
1178 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1179 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1180 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1181 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1183 /* For interleaved data accesses the step in the loop must be multiplied by
1184 the size of the interleaving group. */
1185 if (DR_GROUP_FIRST_DR (stmt_info))
1186 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1187 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1188 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1190 if (known_alignment_for_access_p (dr)
1191 && known_alignment_for_access_p (dr_peel)
1192 && (DR_MISALIGNMENT (dr) / dr_size ==
1193 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1195 DR_MISALIGNMENT (dr) = 0;
1199 /* It can be assumed that the data refs with the same alignment as dr_peel
1200 are aligned in the vector loop. */
1202 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1203 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1205 if (current_dr != dr)
1207 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1208 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1209 DR_MISALIGNMENT (dr) = 0;
1213 if (known_alignment_for_access_p (dr)
1214 && known_alignment_for_access_p (dr_peel))
1216 DR_MISALIGNMENT (dr) += npeel * dr_size;
1217 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "Setting misalignment to -1.");
1223 DR_MISALIGNMENT (dr) = -1;
1227 /* Function vect_verify_datarefs_alignment
1229 Return TRUE if all data references in the loop can be
1230 handled with respect to alignment. */
1233 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1235 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1236 struct data_reference *dr;
1237 enum dr_alignment_support supportable_dr_alignment;
1240 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1242 tree stmt = DR_STMT (dr);
1243 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1245 /* For interleaving, only the alignment of the first access matters. */
1246 if (DR_GROUP_FIRST_DR (stmt_info)
1247 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1250 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1251 if (!supportable_dr_alignment)
1253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1255 if (DR_IS_READ (dr))
1257 "not vectorized: unsupported unaligned load.");
1260 "not vectorized: unsupported unaligned store.");
1264 if (supportable_dr_alignment != dr_aligned
1265 && vect_print_dump_info (REPORT_ALIGNMENT))
1266 fprintf (vect_dump, "Vectorizing an unaligned access.");
1272 /* Function vect_enhance_data_refs_alignment
1274 This pass will use loop versioning and loop peeling in order to enhance
1275 the alignment of data references in the loop.
1277 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1278 original loop is to be vectorized; Any other loops that are created by
1279 the transformations performed in this pass - are not supposed to be
1280 vectorized. This restriction will be relaxed.
1282 This pass will require a cost model to guide it whether to apply peeling
1283 or versioning or a combination of the two. For example, the scheme that
1284 intel uses when given a loop with several memory accesses, is as follows:
1285 choose one memory access ('p') which alignment you want to force by doing
1286 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1287 other accesses are not necessarily aligned, or (2) use loop versioning to
1288 generate one loop in which all accesses are aligned, and another loop in
1289 which only 'p' is necessarily aligned.
1291 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1292 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1293 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1295 Devising a cost model is the most critical aspect of this work. It will
1296 guide us on which access to peel for, whether to use loop versioning, how
1297 many versions to create, etc. The cost model will probably consist of
1298 generic considerations as well as target specific considerations (on
1299 powerpc for example, misaligned stores are more painful than misaligned
1302 Here are the general steps involved in alignment enhancements:
1304 -- original loop, before alignment analysis:
1305 for (i=0; i<N; i++){
1306 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1307 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1310 -- After vect_compute_data_refs_alignment:
1311 for (i=0; i<N; i++){
1312 x = q[i]; # DR_MISALIGNMENT(q) = 3
1313 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1316 -- Possibility 1: we do loop versioning:
1318 for (i=0; i<N; i++){ # loop 1A
1319 x = q[i]; # DR_MISALIGNMENT(q) = 3
1320 p[i] = y; # DR_MISALIGNMENT(p) = 0
1324 for (i=0; i<N; i++){ # loop 1B
1325 x = q[i]; # DR_MISALIGNMENT(q) = 3
1326 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1330 -- Possibility 2: we do loop peeling:
1331 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1335 for (i = 3; i < N; i++){ # loop 2A
1336 x = q[i]; # DR_MISALIGNMENT(q) = 0
1337 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1340 -- Possibility 3: combination of loop peeling and versioning:
1341 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1346 for (i = 3; i<N; i++){ # loop 3A
1347 x = q[i]; # DR_MISALIGNMENT(q) = 0
1348 p[i] = y; # DR_MISALIGNMENT(p) = 0
1352 for (i = 3; i<N; i++){ # loop 3B
1353 x = q[i]; # DR_MISALIGNMENT(q) = 0
1354 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1358 These loops are later passed to loop_transform to be vectorized. The
1359 vectorizer will use the alignment information to guide the transformation
1360 (whether to generate regular loads/stores, or with special handling for
1364 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1366 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1367 enum dr_alignment_support supportable_dr_alignment;
1368 struct data_reference *dr0 = NULL;
1369 struct data_reference *dr;
1371 bool do_peeling = false;
1372 bool do_versioning = false;
1375 stmt_vec_info stmt_info;
1377 if (vect_print_dump_info (REPORT_DETAILS))
1378 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1380 /* While cost model enhancements are expected in the future, the high level
1381 view of the code at this time is as follows:
1383 A) If there is a misaligned write then see if peeling to align this write
1384 can make all data references satisfy vect_supportable_dr_alignment.
1385 If so, update data structures as needed and return true. Note that
1386 at this time vect_supportable_dr_alignment is known to return false
1387 for a a misaligned write.
1389 B) If peeling wasn't possible and there is a data reference with an
1390 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1391 then see if loop versioning checks can be used to make all data
1392 references satisfy vect_supportable_dr_alignment. If so, update
1393 data structures as needed and return true.
1395 C) If neither peeling nor versioning were successful then return false if
1396 any data reference does not satisfy vect_supportable_dr_alignment.
1398 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1400 Note, Possibility 3 above (which is peeling and versioning together) is not
1401 being done at this time. */
1403 /* (1) Peeling to force alignment. */
1405 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1407 + How many accesses will become aligned due to the peeling
1408 - How many accesses will become unaligned due to the peeling,
1409 and the cost of misaligned accesses.
1410 - The cost of peeling (the extra runtime checks, the increase
1413 The scheme we use FORNOW: peel to force the alignment of the first
1414 misaligned store in the loop.
1415 Rationale: misaligned stores are not yet supported.
1417 TODO: Use a cost model. */
1419 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1421 stmt = DR_STMT (dr);
1422 stmt_info = vinfo_for_stmt (stmt);
1424 /* For interleaving, only the alignment of the first access
1426 if (DR_GROUP_FIRST_DR (stmt_info)
1427 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1430 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1432 if (DR_GROUP_FIRST_DR (stmt_info))
1434 /* For interleaved access we peel only if number of iterations in
1435 the prolog loop ({VF - misalignment}), is a multiple of the
1436 number of the interleaved accesses. */
1437 int elem_size, mis_in_elements;
1438 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1440 /* FORNOW: handle only known alignment. */
1441 if (!known_alignment_for_access_p (dr))
1447 elem_size = UNITS_PER_SIMD_WORD / vf;
1448 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1450 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1462 /* Often peeling for alignment will require peeling for loop-bound, which in
1463 turn requires that we know how to adjust the loop ivs after the loop. */
1464 if (!vect_can_advance_ivs_p (loop_vinfo))
1472 if (known_alignment_for_access_p (dr0))
1474 /* Since it's known at compile time, compute the number of iterations
1475 in the peeled loop (the peeling factor) for use in updating
1476 DR_MISALIGNMENT values. The peeling factor is the vectorization
1477 factor minus the misalignment as an element count. */
1478 mis = DR_MISALIGNMENT (dr0);
1479 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1480 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1482 /* For interleaved data access every iteration accesses all the
1483 members of the group, therefore we divide the number of iterations
1484 by the group size. */
1485 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1486 if (DR_GROUP_FIRST_DR (stmt_info))
1487 npeel /= DR_GROUP_SIZE (stmt_info);
1489 if (vect_print_dump_info (REPORT_DETAILS))
1490 fprintf (vect_dump, "Try peeling by %d", npeel);
1493 /* Ensure that all data refs can be vectorized after the peel. */
1494 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1496 int save_misalignment;
1501 stmt = DR_STMT (dr);
1502 stmt_info = vinfo_for_stmt (stmt);
1503 /* For interleaving, only the alignment of the first access
1505 if (DR_GROUP_FIRST_DR (stmt_info)
1506 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1509 save_misalignment = DR_MISALIGNMENT (dr);
1510 vect_update_misalignment_for_peel (dr, dr0, npeel);
1511 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1512 DR_MISALIGNMENT (dr) = save_misalignment;
1514 if (!supportable_dr_alignment)
1523 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1524 If the misalignment of DR_i is identical to that of dr0 then set
1525 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1526 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1527 by the peeling factor times the element size of DR_i (MOD the
1528 vectorization factor times the size). Otherwise, the
1529 misalignment of DR_i must be set to unknown. */
1530 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1532 vect_update_misalignment_for_peel (dr, dr0, npeel);
1534 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1535 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1536 DR_MISALIGNMENT (dr0) = 0;
1537 if (vect_print_dump_info (REPORT_ALIGNMENT))
1538 fprintf (vect_dump, "Alignment of access forced using peeling.");
1540 if (vect_print_dump_info (REPORT_DETAILS))
1541 fprintf (vect_dump, "Peeling for alignment will be applied.");
1543 stat = vect_verify_datarefs_alignment (loop_vinfo);
1550 /* (2) Versioning to force alignment. */
1552 /* Try versioning if:
1553 1) flag_tree_vect_loop_version is TRUE
1554 2) optimize_size is FALSE
1555 3) there is at least one unsupported misaligned data ref with an unknown
1557 4) all misaligned data refs with a known misalignment are supported, and
1558 5) the number of runtime alignment checks is within reason. */
1560 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1564 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1566 stmt = DR_STMT (dr);
1567 stmt_info = vinfo_for_stmt (stmt);
1569 /* For interleaving, only the alignment of the first access
1571 if (aligned_access_p (dr)
1572 || (DR_GROUP_FIRST_DR (stmt_info)
1573 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1576 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1578 if (!supportable_dr_alignment)
1584 if (known_alignment_for_access_p (dr)
1585 || VEC_length (tree,
1586 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1587 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1589 do_versioning = false;
1593 stmt = DR_STMT (dr);
1594 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1595 gcc_assert (vectype);
1597 /* The rightmost bits of an aligned address must be zeros.
1598 Construct the mask needed for this test. For example,
1599 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1600 mask must be 15 = 0xf. */
1601 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1603 /* FORNOW: use the same mask to test all potentially unaligned
1604 references in the loop. The vectorizer currently supports
1605 a single vector size, see the reference to
1606 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1607 vectorization factor is computed. */
1608 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1609 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1610 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1611 VEC_safe_push (tree, heap,
1612 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1617 /* Versioning requires at least one misaligned data reference. */
1618 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1619 do_versioning = false;
1620 else if (!do_versioning)
1621 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1626 VEC(tree,heap) *may_misalign_stmts
1627 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1630 /* It can now be assumed that the data references in the statements
1631 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1632 of the loop being vectorized. */
1633 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1635 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1636 dr = STMT_VINFO_DATA_REF (stmt_info);
1637 DR_MISALIGNMENT (dr) = 0;
1638 if (vect_print_dump_info (REPORT_ALIGNMENT))
1639 fprintf (vect_dump, "Alignment of access forced using versioning.");
1642 if (vect_print_dump_info (REPORT_DETAILS))
1643 fprintf (vect_dump, "Versioning for alignment will be applied.");
1645 /* Peeling and versioning can't be done together at this time. */
1646 gcc_assert (! (do_peeling && do_versioning));
1648 stat = vect_verify_datarefs_alignment (loop_vinfo);
1653 /* This point is reached if neither peeling nor versioning is being done. */
1654 gcc_assert (! (do_peeling || do_versioning));
1656 stat = vect_verify_datarefs_alignment (loop_vinfo);
1661 /* Function vect_analyze_data_refs_alignment
1663 Analyze the alignment of the data-references in the loop.
1664 Return FALSE if a data reference is found that cannot be vectorized. */
1667 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1669 if (vect_print_dump_info (REPORT_DETAILS))
1670 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1672 if (!vect_compute_data_refs_alignment (loop_vinfo))
1674 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1676 "not vectorized: can't calculate alignment for data ref.");
1684 /* Function vect_analyze_data_ref_access.
1686 Analyze the access pattern of the data-reference DR. For now, a data access
1687 has to be consecutive to be considered vectorizable. */
1690 vect_analyze_data_ref_access (struct data_reference *dr)
1692 tree step = DR_STEP (dr);
1693 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1694 tree scalar_type = TREE_TYPE (DR_REF (dr));
1695 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1696 tree stmt = DR_STMT (dr);
1697 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1698 interleaving group (including gaps). */
1699 HOST_WIDE_INT stride = dr_step / type_size;
1703 if (vect_print_dump_info (REPORT_DETAILS))
1704 fprintf (vect_dump, "bad data-ref access");
1709 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1711 /* Mark that it is not interleaving. */
1712 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1716 /* Not consecutive access is possible only if it is a part of interleaving. */
1717 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1719 /* Check if it this DR is a part of interleaving, and is a single
1720 element of the group that is accessed in the loop. */
1722 /* Gaps are supported only for loads. STEP must be a multiple of the type
1723 size. The size of the group must be a power of 2. */
1725 && (dr_step % type_size) == 0
1727 && exact_log2 (stride) != -1)
1729 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1730 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1731 if (vect_print_dump_info (REPORT_DR_DETAILS))
1733 fprintf (vect_dump, "Detected single element interleaving %d ",
1734 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1735 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1736 fprintf (vect_dump, " step ");
1737 print_generic_expr (vect_dump, step, TDF_SLIM);
1741 if (vect_print_dump_info (REPORT_DETAILS))
1742 fprintf (vect_dump, "not consecutive access");
1746 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1748 /* First stmt in the interleaving chain. Check the chain. */
1749 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1750 struct data_reference *data_ref = dr;
1751 unsigned int count = 1;
1753 tree prev_init = DR_INIT (data_ref);
1755 HOST_WIDE_INT diff, count_in_bytes;
1759 /* Skip same data-refs. In case that two or more stmts share data-ref
1760 (supported only for loads), we vectorize only the first stmt, and
1761 the rest get their vectorized loads from the the first one. */
1762 if (!tree_int_cst_compare (DR_INIT (data_ref),
1763 DR_INIT (STMT_VINFO_DATA_REF (
1764 vinfo_for_stmt (next)))))
1766 /* For load use the same data-ref load. (We check in
1767 vect_check_dependences() that there are no two stores to the
1769 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1772 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1777 /* Check that all the accesses have the same STEP. */
1778 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1779 if (tree_int_cst_compare (step, next_step))
1781 if (vect_print_dump_info (REPORT_DETAILS))
1782 fprintf (vect_dump, "not consecutive access in interleaving");
1786 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1787 /* Check that the distance between two accesses is equal to the type
1788 size. Otherwise, we have gaps. */
1789 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1790 - TREE_INT_CST_LOW (prev_init)) / type_size;
1791 if (!DR_IS_READ (data_ref) && diff != 1)
1793 if (vect_print_dump_info (REPORT_DETAILS))
1794 fprintf (vect_dump, "interleaved store with gaps");
1797 /* Store the gap from the previous member of the group. If there is no
1798 gap in the access, DR_GROUP_GAP is always 1. */
1799 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1801 prev_init = DR_INIT (data_ref);
1802 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1803 /* Count the number of data-refs in the chain. */
1807 /* COUNT is the number of accesses found, we multiply it by the size of
1808 the type to get COUNT_IN_BYTES. */
1809 count_in_bytes = type_size * count;
1811 /* Check that the size of the interleaving is not greater than STEP. */
1812 if (dr_step < count_in_bytes)
1814 if (vect_print_dump_info (REPORT_DETAILS))
1816 fprintf (vect_dump, "interleaving size is greater than step for ");
1817 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1822 /* Check that the size of the interleaving is equal to STEP for stores,
1823 i.e., that there are no gaps. */
1824 if (!DR_IS_READ (dr) && dr_step != count_in_bytes)
1826 if (vect_print_dump_info (REPORT_DETAILS))
1827 fprintf (vect_dump, "interleaved store with gaps");
1831 /* Check that STEP is a multiple of type size. */
1832 if ((dr_step % type_size) != 0)
1834 if (vect_print_dump_info (REPORT_DETAILS))
1836 fprintf (vect_dump, "step is not a multiple of type size: step ");
1837 print_generic_expr (vect_dump, step, TDF_SLIM);
1838 fprintf (vect_dump, " size ");
1839 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1845 /* FORNOW: we handle only interleaving that is a power of 2. */
1846 if (exact_log2 (stride) == -1)
1848 if (vect_print_dump_info (REPORT_DETAILS))
1849 fprintf (vect_dump, "interleaving is not a power of 2");
1852 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1858 /* Function vect_analyze_data_ref_accesses.
1860 Analyze the access pattern of all the data references in the loop.
1862 FORNOW: the only access pattern that is considered vectorizable is a
1863 simple step 1 (consecutive) access.
1865 FORNOW: handle only arrays and pointer accesses. */
1868 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1871 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1872 struct data_reference *dr;
1874 if (vect_print_dump_info (REPORT_DETAILS))
1875 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1877 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1878 if (!vect_analyze_data_ref_access (dr))
1880 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1881 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1889 /* Function vect_analyze_data_refs.
1891 Find all the data references in the loop.
1893 The general structure of the analysis of data refs in the vectorizer is as
1895 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1896 find and analyze all data-refs in the loop and their dependences.
1897 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1898 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1899 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1904 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1906 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1908 VEC (data_reference_p, heap) *datarefs;
1909 struct data_reference *dr;
1912 if (vect_print_dump_info (REPORT_DETAILS))
1913 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1915 compute_data_dependences_for_loop (loop, true,
1916 &LOOP_VINFO_DATAREFS (loop_vinfo),
1917 &LOOP_VINFO_DDRS (loop_vinfo));
1919 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1920 from stmt_vec_info struct to DR and vectype. */
1921 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1923 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1926 stmt_vec_info stmt_info;
1928 if (!dr || !DR_REF (dr))
1930 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1931 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1935 /* Update DR field in stmt_vec_info struct. */
1936 stmt = DR_STMT (dr);
1937 stmt_info = vinfo_for_stmt (stmt);
1939 if (STMT_VINFO_DATA_REF (stmt_info))
1941 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1944 "not vectorized: more than one data ref in stmt: ");
1945 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1949 STMT_VINFO_DATA_REF (stmt_info) = dr;
1951 /* Check that analysis of the data-ref succeeded. */
1952 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1955 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1957 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1958 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1962 if (!DR_MEMTAG (dr))
1964 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1966 fprintf (vect_dump, "not vectorized: no memory tag for ");
1967 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1972 /* Set vectype for STMT. */
1973 scalar_type = TREE_TYPE (DR_REF (dr));
1974 STMT_VINFO_VECTYPE (stmt_info) =
1975 get_vectype_for_scalar_type (scalar_type);
1976 if (!STMT_VINFO_VECTYPE (stmt_info))
1978 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1981 "not vectorized: no vectype for stmt: ");
1982 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1983 fprintf (vect_dump, " scalar_type: ");
1984 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1994 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1996 /* Function vect_mark_relevant.
1998 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2001 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2002 enum vect_relevant relevant, bool live_p)
2004 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2005 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2006 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2008 if (vect_print_dump_info (REPORT_DETAILS))
2009 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
2011 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2015 /* This is the last stmt in a sequence that was detected as a
2016 pattern that can potentially be vectorized. Don't mark the stmt
2017 as relevant/live because it's not going to vectorized.
2018 Instead mark the pattern-stmt that replaces it. */
2019 if (vect_print_dump_info (REPORT_DETAILS))
2020 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2021 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2022 stmt_info = vinfo_for_stmt (pattern_stmt);
2023 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2024 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2025 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2026 stmt = pattern_stmt;
2029 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2030 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2031 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2033 if (TREE_CODE (stmt) == PHI_NODE)
2034 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2035 or live will fail vectorization later on. */
2038 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2039 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2041 if (vect_print_dump_info (REPORT_DETAILS))
2042 fprintf (vect_dump, "already marked relevant/live.");
2046 VEC_safe_push (tree, heap, *worklist, stmt);
2050 /* Function vect_stmt_relevant_p.
2052 Return true if STMT in loop that is represented by LOOP_VINFO is
2053 "relevant for vectorization".
2055 A stmt is considered "relevant for vectorization" if:
2056 - it has uses outside the loop.
2057 - it has vdefs (it alters memory).
2058 - control stmts in the loop (except for the exit condition).
2060 CHECKME: what other side effects would the vectorizer allow? */
2063 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2064 enum vect_relevant *relevant, bool *live_p)
2066 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2067 ssa_op_iter op_iter;
2068 imm_use_iterator imm_iter;
2069 use_operand_p use_p;
2070 def_operand_p def_p;
2072 *relevant = vect_unused_in_loop;
2075 /* cond stmt other than loop exit cond. */
2076 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2077 *relevant = vect_used_in_loop;
2079 /* changing memory. */
2080 if (TREE_CODE (stmt) != PHI_NODE)
2081 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2083 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2085 *relevant = vect_used_in_loop;
2088 /* uses outside the loop. */
2089 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2091 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2093 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2094 if (!flow_bb_inside_loop_p (loop, bb))
2096 if (vect_print_dump_info (REPORT_DETAILS))
2097 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2099 /* We expect all such uses to be in the loop exit phis
2100 (because of loop closed form) */
2101 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2102 gcc_assert (bb == single_exit (loop)->dest);
2109 return (*live_p || *relevant);
2113 /* Function vect_mark_stmts_to_be_vectorized.
2115 Not all stmts in the loop need to be vectorized. For example:
2124 Stmt 1 and 3 do not need to be vectorized, because loop control and
2125 addressing of vectorized data-refs are handled differently.
2127 This pass detects such stmts. */
2130 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2132 VEC(tree,heap) *worklist;
2133 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2134 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2135 unsigned int nbbs = loop->num_nodes;
2136 block_stmt_iterator si;
2141 stmt_vec_info stmt_vinfo;
2145 enum vect_relevant relevant;
2147 enum vect_def_type dt;
2149 if (vect_print_dump_info (REPORT_DETAILS))
2150 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2152 worklist = VEC_alloc (tree, heap, 64);
2154 /* 1. Init worklist. */
2157 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2159 if (vect_print_dump_info (REPORT_DETAILS))
2161 fprintf (vect_dump, "init: phi relevant? ");
2162 print_generic_expr (vect_dump, phi, TDF_SLIM);
2165 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2166 vect_mark_relevant (&worklist, phi, relevant, live_p);
2169 for (i = 0; i < nbbs; i++)
2172 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2174 stmt = bsi_stmt (si);
2176 if (vect_print_dump_info (REPORT_DETAILS))
2178 fprintf (vect_dump, "init: stmt relevant? ");
2179 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2182 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2183 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2188 /* 2. Process_worklist */
2190 while (VEC_length (tree, worklist) > 0)
2192 stmt = VEC_pop (tree, worklist);
2194 if (vect_print_dump_info (REPORT_DETAILS))
2196 fprintf (vect_dump, "worklist: examine stmt: ");
2197 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2200 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2201 in the loop, mark the stmt that defines it (DEF_STMT) as
2202 relevant/irrelevant and live/dead according to the liveness and
2203 relevance properties of STMT.
2206 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2208 ann = stmt_ann (stmt);
2209 stmt_vinfo = vinfo_for_stmt (stmt);
2211 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2212 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2214 /* Generally, the liveness and relevance properties of STMT are
2215 propagated to the DEF_STMTs of its USEs:
2216 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2217 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2222 If USE is used only for address computations (e.g. array indexing),
2223 which does not need to be directly vectorized, then the
2224 liveness/relevance of the respective DEF_STMT is left unchanged.
2227 If STMT has been identified as defining a reduction variable, then
2230 The last use of STMT is the reduction-variable, which is defined
2231 by a loop-header-phi. We don't want to mark the phi as live or
2232 relevant (because it does not need to be vectorized, it is handled
2233 as part of the vectorization of the reduction), so in this case we
2234 skip the call to vect_mark_relevant.
2236 The rest of the uses of STMT are defined in the loop body. For
2237 the def_stmt of these uses we want to set liveness/relevance
2239 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2240 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2241 because even though STMT is classified as live (since it defines a
2242 value that is used across loop iterations) and irrelevant (since it
2243 is not used inside the loop), it will be vectorized, and therefore
2244 the corresponding DEF_STMTs need to marked as relevant.
2245 We distinguish between two kinds of relevant stmts - those that are
2246 used by a reduction computation, and those that are (also) used by
2247 a regular computation. This allows us later on to identify stmts
2248 that are used solely by a reduction, and therefore the order of
2249 the results that they produce does not have to be kept.
2253 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2255 gcc_assert (relevant == vect_unused_in_loop && live_p);
2256 relevant = vect_used_by_reduction;
2260 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2262 /* case 1: we are only interested in uses that need to be vectorized.
2263 Uses that are used for address computation are not considered
2266 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2269 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2271 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2272 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2273 VEC_free (tree, heap, worklist);
2277 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2280 if (vect_print_dump_info (REPORT_DETAILS))
2282 fprintf (vect_dump, "worklist: examine use %d: ", i);
2283 print_generic_expr (vect_dump, use, TDF_SLIM);
2286 bb = bb_for_stmt (def_stmt);
2287 if (!flow_bb_inside_loop_p (loop, bb))
2290 /* case 2.1: the reduction-use does not mark the defining-phi
2292 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2293 && TREE_CODE (def_stmt) == PHI_NODE)
2296 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2298 } /* while worklist */
2300 VEC_free (tree, heap, worklist);
2305 /* Function vect_can_advance_ivs_p
2307 In case the number of iterations that LOOP iterates is unknown at compile
2308 time, an epilog loop will be generated, and the loop induction variables
2309 (IVs) will be "advanced" to the value they are supposed to take just before
2310 the epilog loop. Here we check that the access function of the loop IVs
2311 and the expression that represents the loop bound are simple enough.
2312 These restrictions will be relaxed in the future. */
2315 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2318 basic_block bb = loop->header;
2321 /* Analyze phi functions of the loop header. */
2323 if (vect_print_dump_info (REPORT_DETAILS))
2324 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2326 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2328 tree access_fn = NULL;
2329 tree evolution_part;
2331 if (vect_print_dump_info (REPORT_DETAILS))
2333 fprintf (vect_dump, "Analyze phi: ");
2334 print_generic_expr (vect_dump, phi, TDF_SLIM);
2337 /* Skip virtual phi's. The data dependences that are associated with
2338 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2340 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2342 if (vect_print_dump_info (REPORT_DETAILS))
2343 fprintf (vect_dump, "virtual phi. skip.");
2347 /* Skip reduction phis. */
2349 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2351 if (vect_print_dump_info (REPORT_DETAILS))
2352 fprintf (vect_dump, "reduc phi. skip.");
2356 /* Analyze the evolution function. */
2358 access_fn = instantiate_parameters
2359 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "No Access function.");
2368 if (vect_print_dump_info (REPORT_DETAILS))
2370 fprintf (vect_dump, "Access function of PHI: ");
2371 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2374 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2376 if (evolution_part == NULL_TREE)
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "No evolution.");
2383 /* FORNOW: We do not transform initial conditions of IVs
2384 which evolution functions are a polynomial of degree >= 2. */
2386 if (tree_is_chrec (evolution_part))
2394 /* Function vect_get_loop_niters.
2396 Determine how many iterations the loop is executed.
2397 If an expression that represents the number of iterations
2398 can be constructed, place it in NUMBER_OF_ITERATIONS.
2399 Return the loop exit condition. */
2402 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2406 if (vect_print_dump_info (REPORT_DETAILS))
2407 fprintf (vect_dump, "=== get_loop_niters ===");
2409 niters = number_of_exit_cond_executions (loop);
2411 if (niters != NULL_TREE
2412 && niters != chrec_dont_know)
2414 *number_of_iterations = niters;
2416 if (vect_print_dump_info (REPORT_DETAILS))
2418 fprintf (vect_dump, "==> get_loop_niters:" );
2419 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2423 return get_loop_exit_condition (loop);
2427 /* Function vect_analyze_loop_form.
2429 Verify the following restrictions (some may be relaxed in the future):
2430 - it's an inner-most loop
2431 - number of BBs = 2 (which are the loop header and the latch)
2432 - the loop has a pre-header
2433 - the loop has a single entry and exit
2434 - the loop exit condition is simple enough, and the number of iterations
2435 can be analyzed (a countable loop). */
2437 static loop_vec_info
2438 vect_analyze_loop_form (struct loop *loop)
2440 loop_vec_info loop_vinfo;
2442 tree number_of_iterations = NULL;
2444 if (vect_print_dump_info (REPORT_DETAILS))
2445 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2449 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2450 fprintf (vect_dump, "not vectorized: nested loop.");
2454 if (!single_exit (loop)
2455 || loop->num_nodes != 2
2456 || EDGE_COUNT (loop->header->preds) != 2)
2458 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2460 if (!single_exit (loop))
2461 fprintf (vect_dump, "not vectorized: multiple exits.");
2462 else if (loop->num_nodes != 2)
2463 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2464 else if (EDGE_COUNT (loop->header->preds) != 2)
2465 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2471 /* We assume that the loop exit condition is at the end of the loop. i.e,
2472 that the loop is represented as a do-while (with a proper if-guard
2473 before the loop if needed), where the loop header contains all the
2474 executable statements, and the latch is empty. */
2475 if (!empty_block_p (loop->latch)
2476 || phi_nodes (loop->latch))
2478 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2479 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2483 /* Make sure there exists a single-predecessor exit bb: */
2484 if (!single_pred_p (single_exit (loop)->dest))
2486 edge e = single_exit (loop);
2487 if (!(e->flags & EDGE_ABNORMAL))
2489 split_loop_exit_edge (e);
2490 if (vect_print_dump_info (REPORT_DETAILS))
2491 fprintf (vect_dump, "split exit edge.");
2495 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2496 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2501 if (empty_block_p (loop->header))
2503 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2504 fprintf (vect_dump, "not vectorized: empty loop.");
2508 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2511 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2512 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2516 if (!number_of_iterations)
2518 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2520 "not vectorized: number of iterations cannot be computed.");
2524 if (chrec_contains_undetermined (number_of_iterations))
2526 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2527 fprintf (vect_dump, "Infinite number of iterations.");
2531 loop_vinfo = new_loop_vec_info (loop);
2532 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2534 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2536 if (vect_print_dump_info (REPORT_DETAILS))
2538 fprintf (vect_dump, "Symbolic number of iterations is ");
2539 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2543 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2545 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2546 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2550 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2556 /* Function vect_analyze_loop.
2558 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2559 for it. The different analyses will record information in the
2560 loop_vec_info struct. */
2562 vect_analyze_loop (struct loop *loop)
2565 loop_vec_info loop_vinfo;
2567 if (vect_print_dump_info (REPORT_DETAILS))
2568 fprintf (vect_dump, "===== analyze_loop_nest =====");
2570 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2572 loop_vinfo = vect_analyze_loop_form (loop);
2575 if (vect_print_dump_info (REPORT_DETAILS))
2576 fprintf (vect_dump, "bad loop form.");
2580 /* Find all data references in the loop (which correspond to vdefs/vuses)
2581 and analyze their evolution in the loop.
2583 FORNOW: Handle only simple, array references, which
2584 alignment can be forced, and aligned pointer-references. */
2586 ok = vect_analyze_data_refs (loop_vinfo);
2589 if (vect_print_dump_info (REPORT_DETAILS))
2590 fprintf (vect_dump, "bad data references.");
2591 destroy_loop_vec_info (loop_vinfo);
2595 /* Classify all cross-iteration scalar data-flow cycles.
2596 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2598 vect_analyze_scalar_cycles (loop_vinfo);
2600 vect_pattern_recog (loop_vinfo);
2602 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2604 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2607 if (vect_print_dump_info (REPORT_DETAILS))
2608 fprintf (vect_dump, "unexpected pattern.");
2609 destroy_loop_vec_info (loop_vinfo);
2613 /* Analyze the alignment of the data-refs in the loop.
2614 Fail if a data reference is found that cannot be vectorized. */
2616 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2619 if (vect_print_dump_info (REPORT_DETAILS))
2620 fprintf (vect_dump, "bad data alignment.");
2621 destroy_loop_vec_info (loop_vinfo);
2625 ok = vect_determine_vectorization_factor (loop_vinfo);
2628 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "can't determine vectorization factor.");
2630 destroy_loop_vec_info (loop_vinfo);
2634 /* Analyze data dependences between the data-refs in the loop.
2635 FORNOW: fail at the first data dependence that we encounter. */
2637 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2640 if (vect_print_dump_info (REPORT_DETAILS))
2641 fprintf (vect_dump, "bad data dependence.");
2642 destroy_loop_vec_info (loop_vinfo);
2646 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2647 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2649 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2652 if (vect_print_dump_info (REPORT_DETAILS))
2653 fprintf (vect_dump, "bad data access.");
2654 destroy_loop_vec_info (loop_vinfo);
2658 /* This pass will decide on using loop versioning and/or loop peeling in
2659 order to enhance the alignment of data references in the loop. */
2661 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2664 if (vect_print_dump_info (REPORT_DETAILS))
2665 fprintf (vect_dump, "bad data alignment.");
2666 destroy_loop_vec_info (loop_vinfo);
2670 /* Scan all the operations in the loop and make sure they are
2673 ok = vect_analyze_operations (loop_vinfo);
2676 if (vect_print_dump_info (REPORT_DETAILS))
2677 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2678 destroy_loop_vec_info (loop_vinfo);
2682 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;