1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
43 /* Main analysis functions. */
44 static loop_vec_info vect_analyze_loop_form (struct loop *);
45 static bool vect_analyze_data_refs (loop_vec_info);
46 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
47 static void vect_analyze_scalar_cycles (loop_vec_info);
48 static bool vect_analyze_data_ref_accesses (loop_vec_info);
49 static bool vect_analyze_data_ref_dependences (loop_vec_info);
50 static bool vect_analyze_data_refs_alignment (loop_vec_info);
51 static bool vect_compute_data_refs_alignment (loop_vec_info);
52 static bool vect_enhance_data_refs_alignment (loop_vec_info);
53 static bool vect_analyze_operations (loop_vec_info);
54 static bool vect_determine_vectorization_factor (loop_vec_info);
56 /* Utility functions for the analyses. */
57 static bool exist_non_indexing_operands_for_use_p (tree, tree);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_dependence_relation *, loop_vec_info, bool);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static bool vect_can_advance_ivs_p (loop_vec_info);
64 static void vect_update_misalignment_for_peel
65 (struct data_reference *, struct data_reference *, int npeel);
67 /* Function vect_determine_vectorization_factor
69 Determine the vectorization factor (VF). VF is the number of data elements
70 that are operated upon in parallel in a single iteration of the vectorized
71 loop. For example, when vectorizing a loop that operates on 4byte elements,
72 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
73 elements can fit in a single vector register.
75 We currently support vectorization of loops in which all types operated upon
76 are of the same size. Therefore this function currently sets VF according to
77 the size of the types operated upon, and fails if there are multiple sizes
80 VF is also the factor by which the loop iterations are strip-mined, e.g.:
87 for (i=0; i<N; i+=VF){
88 a[i:VF] = b[i:VF] + c[i:VF];
93 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
95 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
96 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
97 int nbbs = loop->num_nodes;
98 block_stmt_iterator si;
99 unsigned int vectorization_factor = 0;
103 if (vect_print_dump_info (REPORT_DETAILS))
104 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
106 for (i = 0; i < nbbs; i++)
108 basic_block bb = bbs[i];
110 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
112 tree stmt = bsi_stmt (si);
114 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
117 if (vect_print_dump_info (REPORT_DETAILS))
119 fprintf (vect_dump, "==> examining statement: ");
120 print_generic_expr (vect_dump, stmt, TDF_SLIM);
123 gcc_assert (stmt_info);
124 /* skip stmts which do not need to be vectorized. */
125 if (!STMT_VINFO_RELEVANT_P (stmt_info)
126 && !STMT_VINFO_LIVE_P (stmt_info))
128 if (vect_print_dump_info (REPORT_DETAILS))
129 fprintf (vect_dump, "skip.");
133 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
135 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
137 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
138 print_generic_expr (vect_dump, stmt, TDF_SLIM);
143 if (STMT_VINFO_VECTYPE (stmt_info))
145 vectype = STMT_VINFO_VECTYPE (stmt_info);
146 scalar_type = TREE_TYPE (vectype);
150 if (STMT_VINFO_DATA_REF (stmt_info))
152 TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
153 else if (TREE_CODE (stmt) == MODIFY_EXPR)
154 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
156 scalar_type = TREE_TYPE (stmt);
158 if (vect_print_dump_info (REPORT_DETAILS))
160 fprintf (vect_dump, "get vectype for scalar type: ");
161 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
164 vectype = get_vectype_for_scalar_type (scalar_type);
167 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
170 "not vectorized: unsupported data-type ");
171 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
175 STMT_VINFO_VECTYPE (stmt_info) = vectype;
178 if (vect_print_dump_info (REPORT_DETAILS))
180 fprintf (vect_dump, "vectype: ");
181 print_generic_expr (vect_dump, vectype, TDF_SLIM);
184 nunits = TYPE_VECTOR_SUBPARTS (vectype);
185 if (vect_print_dump_info (REPORT_DETAILS))
186 fprintf (vect_dump, "nunits = %d", nunits);
188 if (!vectorization_factor
189 || (nunits > vectorization_factor))
190 vectorization_factor = nunits;
195 /* TODO: Analyze cost. Decide if worth while to vectorize. */
197 if (vectorization_factor <= 1)
199 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
200 fprintf (vect_dump, "not vectorized: unsupported data-type");
203 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
209 /* Function vect_analyze_operations.
211 Scan the loop stmts and make sure they are all vectorizable. */
214 vect_analyze_operations (loop_vec_info loop_vinfo)
216 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
217 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
218 int nbbs = loop->num_nodes;
219 block_stmt_iterator si;
220 unsigned int vectorization_factor = 0;
224 stmt_vec_info stmt_info;
225 bool need_to_vectorize = false;
227 if (vect_print_dump_info (REPORT_DETAILS))
228 fprintf (vect_dump, "=== vect_analyze_operations ===");
230 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
231 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
233 for (i = 0; i < nbbs; i++)
235 basic_block bb = bbs[i];
237 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
239 stmt_info = vinfo_for_stmt (phi);
240 if (vect_print_dump_info (REPORT_DETAILS))
242 fprintf (vect_dump, "examining phi: ");
243 print_generic_expr (vect_dump, phi, TDF_SLIM);
246 gcc_assert (stmt_info);
248 if (STMT_VINFO_LIVE_P (stmt_info))
250 /* FORNOW: not yet supported. */
251 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
252 fprintf (vect_dump, "not vectorized: value used after loop.");
256 if (STMT_VINFO_RELEVANT_P (stmt_info))
258 /* Most likely a reduction-like computation that is used
260 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
261 fprintf (vect_dump, "not vectorized: unsupported pattern.");
266 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
268 tree stmt = bsi_stmt (si);
269 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
271 if (vect_print_dump_info (REPORT_DETAILS))
273 fprintf (vect_dump, "==> examining statement: ");
274 print_generic_expr (vect_dump, stmt, TDF_SLIM);
277 gcc_assert (stmt_info);
279 /* skip stmts which do not need to be vectorized.
280 this is expected to include:
281 - the COND_EXPR which is the loop exit condition
282 - any LABEL_EXPRs in the loop
283 - computations that are used only for array indexing or loop
286 if (!STMT_VINFO_RELEVANT_P (stmt_info)
287 && !STMT_VINFO_LIVE_P (stmt_info))
289 if (vect_print_dump_info (REPORT_DETAILS))
290 fprintf (vect_dump, "irrelevant.");
294 if (STMT_VINFO_RELEVANT_P (stmt_info))
296 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
297 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
299 ok = (vectorizable_type_promotion (stmt, NULL, NULL)
300 || vectorizable_type_demotion (stmt, NULL, NULL)
301 || vectorizable_operation (stmt, NULL, NULL)
302 || vectorizable_assignment (stmt, NULL, NULL)
303 || vectorizable_load (stmt, NULL, NULL)
304 || vectorizable_store (stmt, NULL, NULL)
305 || vectorizable_condition (stmt, NULL, NULL));
309 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
312 "not vectorized: relevant stmt not supported: ");
313 print_generic_expr (vect_dump, stmt, TDF_SLIM);
317 need_to_vectorize = true;
320 if (STMT_VINFO_LIVE_P (stmt_info))
322 ok = vectorizable_reduction (stmt, NULL, NULL);
325 need_to_vectorize = true;
327 ok = vectorizable_live_operation (stmt, NULL, NULL);
331 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
334 "not vectorized: live stmt not supported: ");
335 print_generic_expr (vect_dump, stmt, TDF_SLIM);
343 /* TODO: Analyze cost. Decide if worth while to vectorize. */
345 /* All operations in the loop are either irrelevant (deal with loop
346 control, or dead), or only used outside the loop and can be moved
347 out of the loop (e.g. invariants, inductions). The loop can be
348 optimized away by scalar optimizations. We're better off not
349 touching this loop. */
350 if (!need_to_vectorize)
352 if (vect_print_dump_info (REPORT_DETAILS))
354 "All the computation can be taken out of the loop.");
355 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
357 "not vectorized: redundant loop. no profit to vectorize.");
361 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
362 && vect_print_dump_info (REPORT_DETAILS))
364 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
365 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
367 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
368 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
370 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
371 fprintf (vect_dump, "not vectorized: iteration count too small.");
375 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
376 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0
377 || LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
379 if (vect_print_dump_info (REPORT_DETAILS))
380 fprintf (vect_dump, "epilog loop required.");
381 if (!vect_can_advance_ivs_p (loop_vinfo))
383 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
385 "not vectorized: can't create epilog loop 1.");
388 if (!slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
392 "not vectorized: can't create epilog loop 2.");
401 /* Function exist_non_indexing_operands_for_use_p
403 USE is one of the uses attached to STMT. Check if USE is
404 used in STMT for anything other than indexing an array. */
407 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
410 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
412 /* USE corresponds to some operand in STMT. If there is no data
413 reference in STMT, then any operand that corresponds to USE
414 is not indexing an array. */
415 if (!STMT_VINFO_DATA_REF (stmt_info))
418 /* STMT has a data_ref. FORNOW this means that its of one of
422 (This should have been verified in analyze_data_refs).
424 'var' in the second case corresponds to a def, not a use,
425 so USE cannot correspond to any operands that are not used
428 Therefore, all we need to check is if STMT falls into the
429 first case, and whether var corresponds to USE. */
431 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
434 operand = TREE_OPERAND (stmt, 1);
436 if (TREE_CODE (operand) != SSA_NAME)
446 /* Function vect_analyze_scalar_cycles.
448 Examine the cross iteration def-use cycles of scalar variables, by
449 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
450 following: invariant, induction, reduction, unknown.
452 Some forms of scalar cycles are not yet supported.
454 Example1: reduction: (unsupported yet)
460 Example2: induction: (unsupported yet)
466 Note: the following loop *is* vectorizable:
472 even though it has a def-use cycle caused by the induction variable i:
474 loop: i_2 = PHI (i_0, i_1)
479 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
480 it does not need to be vectorized because it is only used for array
481 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
482 loop2 on the other hand is relevant (it is being written to memory).
486 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
490 basic_block bb = loop->header;
493 if (vect_print_dump_info (REPORT_DETAILS))
494 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
496 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
498 tree access_fn = NULL;
499 tree def = PHI_RESULT (phi);
500 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
503 if (vect_print_dump_info (REPORT_DETAILS))
505 fprintf (vect_dump, "Analyze phi: ");
506 print_generic_expr (vect_dump, phi, TDF_SLIM);
509 /* Skip virtual phi's. The data dependences that are associated with
510 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
512 if (!is_gimple_reg (SSA_NAME_VAR (def)))
514 if (vect_print_dump_info (REPORT_DETAILS))
515 fprintf (vect_dump, "virtual phi. skip.");
519 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
521 /* Analyze the evolution function. */
523 access_fn = analyze_scalar_evolution (loop, def);
528 if (vect_print_dump_info (REPORT_DETAILS))
530 fprintf (vect_dump, "Access function of PHI: ");
531 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
534 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
536 if (vect_print_dump_info (REPORT_DETAILS))
537 fprintf (vect_dump, "Detected induction.");
538 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
542 /* TODO: handle invariant phis */
544 reduc_stmt = vect_is_simple_reduction (loop, phi);
547 if (vect_print_dump_info (REPORT_DETAILS))
548 fprintf (vect_dump, "Detected reduction.");
549 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
550 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
554 if (vect_print_dump_info (REPORT_DETAILS))
555 fprintf (vect_dump, "Unknown def-use cycle pattern.");
563 /* Function vect_insert_into_interleaving_chain.
565 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
568 vect_insert_into_interleaving_chain (struct data_reference *dra,
569 struct data_reference *drb)
571 tree prev, next, next_init;
572 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
573 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
575 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
576 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
579 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
580 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
583 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
584 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
588 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
591 /* We got to the end of the list. Insert here. */
592 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
593 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL_TREE;
597 /* Function vect_update_interleaving_chain.
599 For two data-refs DRA and DRB that are a part of a chain interleaved data
600 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
602 There are four possible cases:
603 1. New stmts - both DRA and DRB are not a part of any chain:
606 2. DRB is a part of a chain and DRA is not:
607 no need to update FIRST_DR
608 no need to insert DRB
609 insert DRA according to init
610 3. DRA is a part of a chain and DRB is not:
611 if (init of FIRST_DR > init of DRB)
613 NEXT(FIRST_DR) = previous FIRST_DR
615 insert DRB according to its init
616 4. both DRA and DRB are in some interleaving chains:
617 choose the chain with the smallest init of FIRST_DR
618 insert the nodes of the second chain into the first one. */
621 vect_update_interleaving_chain (struct data_reference *drb,
622 struct data_reference *dra)
624 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
625 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
626 tree next_init, init_dra_chain, init_drb_chain, first_a, first_b;
627 tree node, prev, next, node_init, first_stmt;
629 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
630 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
632 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
633 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
634 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
638 /* 2. DRB is a part of a chain and DRA is not. */
639 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
641 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
642 /* Insert DRA into the chain of DRB. */
643 vect_insert_into_interleaving_chain (dra, drb);
647 /* 3. DRA is a part of a chain and DRB is not. */
648 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
650 tree old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
651 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
655 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
657 /* DRB's init is smaller than the init of the stmt previously marked
658 as the first stmt of the interleaving chain of DRA. Therefore, we
659 update FIRST_STMT and put DRB in the head of the list. */
660 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
661 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
663 /* Update all the stmts in the list to point to the new FIRST_STMT. */
664 tmp = old_first_stmt;
667 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
668 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
673 /* Insert DRB in the list of DRA. */
674 vect_insert_into_interleaving_chain (drb, dra);
675 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
680 /* 4. both DRA and DRB are in some interleaving chains. */
681 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
682 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
683 if (first_a == first_b)
685 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
686 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
688 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
690 /* Insert the nodes of DRA chain into the DRB chain.
691 After inserting a node, continue from this node of the DRB chain (don't
692 start from the beginning. */
693 node = DR_GROUP_FIRST_DR (stmtinfo_a);
694 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
695 first_stmt = first_b;
699 /* Insert the nodes of DRB chain into the DRA chain.
700 After inserting a node, continue from this node of the DRA chain (don't
701 start from the beginning. */
702 node = DR_GROUP_FIRST_DR (stmtinfo_b);
703 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
704 first_stmt = first_a;
709 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
710 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
713 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
714 if (tree_int_cst_compare (next_init, node_init) > 0)
717 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
718 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
723 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
727 /* We got to the end of the list. Insert here. */
728 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
729 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL_TREE;
732 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
733 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
738 /* Function vect_equal_offsets.
740 Check if OFFSET1 and OFFSET2 are identical expressions. */
743 vect_equal_offsets (tree offset1, tree offset2)
747 STRIP_NOPS (offset1);
748 STRIP_NOPS (offset2);
750 if (offset1 == offset2)
753 if (TREE_CODE (offset1) != TREE_CODE (offset2)
754 || !BINARY_CLASS_P (offset1)
755 || !BINARY_CLASS_P (offset2))
758 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
759 TREE_OPERAND (offset2, 0));
760 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
761 TREE_OPERAND (offset2, 1));
763 return (res0 && res1);
767 /* Function vect_check_interleaving.
769 Check if DRA and DRB are a part of interleaving. In case they are, insert
770 DRA and DRB in an interleaving chain. */
773 vect_check_interleaving (struct data_reference *dra,
774 struct data_reference *drb)
776 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
778 /* Check that the data-refs have same first location (except init) and they
779 are both either store or load (not load and store). */
780 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
781 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
782 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
783 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
784 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
785 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
786 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
787 || DR_IS_READ (dra) != DR_IS_READ (drb))
791 1. data-refs are of the same type
792 2. their steps are equal
793 3. the step is greater than the difference between data-refs' inits */
794 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
795 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
797 if (type_size_a != type_size_b
798 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb)))
801 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
802 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
803 step = TREE_INT_CST_LOW (DR_STEP (dra));
807 /* If init_a == init_b + the size of the type * k, we have an interleaving,
808 and DRB is accessed before DRA. */
809 diff_mod_size = (init_a - init_b) % type_size_a;
811 if ((init_a - init_b) > step)
814 if (diff_mod_size == 0)
816 vect_update_interleaving_chain (drb, dra);
817 if (vect_print_dump_info (REPORT_DR_DETAILS))
819 fprintf (vect_dump, "Detected interleaving ");
820 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
821 fprintf (vect_dump, " and ");
822 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
829 /* If init_b == init_a + the size of the type * k, we have an
830 interleaving, and DRA is accessed before DRB. */
831 diff_mod_size = (init_b - init_a) % type_size_a;
833 if ((init_b - init_a) > step)
836 if (diff_mod_size == 0)
838 vect_update_interleaving_chain (dra, drb);
839 if (vect_print_dump_info (REPORT_DR_DETAILS))
841 fprintf (vect_dump, "Detected interleaving ");
842 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
843 fprintf (vect_dump, " and ");
844 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
852 /* Function vect_analyze_data_ref_dependence.
854 Return TRUE if there (might) exist a dependence between a memory-reference
855 DRA and a memory-reference DRB. */
858 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
859 loop_vec_info loop_vinfo,
860 bool check_interleaving)
863 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
864 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
865 struct data_reference *dra = DDR_A (ddr);
866 struct data_reference *drb = DDR_B (ddr);
867 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
868 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
869 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
870 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
871 lambda_vector dist_v;
872 unsigned int loop_depth;
874 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
876 /* Independent data accesses. */
877 if (check_interleaving)
878 vect_check_interleaving (dra, drb);
882 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
885 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
887 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
890 "not vectorized: can't determine dependence between ");
891 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
892 fprintf (vect_dump, " and ");
893 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
898 if (DDR_NUM_DIST_VECTS (ddr) == 0)
900 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
902 fprintf (vect_dump, "not vectorized: bad dist vector for ");
903 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
904 fprintf (vect_dump, " and ");
905 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
910 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
911 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
913 int dist = dist_v[loop_depth];
915 if (vect_print_dump_info (REPORT_DR_DETAILS))
916 fprintf (vect_dump, "dependence distance = %d.", dist);
918 /* Same loop iteration. */
919 if (dist % vectorization_factor == 0 && dra_size == drb_size)
921 /* Two references with distance zero have the same alignment. */
922 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
923 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
924 if (vect_print_dump_info (REPORT_ALIGNMENT))
925 fprintf (vect_dump, "accesses have the same alignment.");
926 if (vect_print_dump_info (REPORT_DR_DETAILS))
928 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
929 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
930 fprintf (vect_dump, " and ");
931 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
936 if (abs (dist) >= vectorization_factor)
938 /* Dependence distance does not create dependence, as far as vectorization
939 is concerned, in this case. */
940 if (vect_print_dump_info (REPORT_DR_DETAILS))
941 fprintf (vect_dump, "dependence distance >= VF.");
945 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
948 "not vectorized: possible dependence between data-refs ");
949 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
950 fprintf (vect_dump, " and ");
951 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
961 /* Function vect_check_dependences.
963 Return TRUE if there is a store-store or load-store dependence between
964 data-refs in DDR, otherwise return FALSE. */
967 vect_check_dependences (struct data_dependence_relation *ddr)
969 struct data_reference *dra = DDR_A (ddr);
970 struct data_reference *drb = DDR_B (ddr);
972 if (DDR_ARE_DEPENDENT (ddr) == chrec_known || dra == drb)
973 /* Independent or same data accesses. */
976 if (DR_IS_READ (dra) == DR_IS_READ (drb) && DR_IS_READ (dra))
980 if (vect_print_dump_info (REPORT_DR_DETAILS))
982 fprintf (vect_dump, "possible store or store/load dependence between ");
983 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
984 fprintf (vect_dump, " and ");
985 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
991 /* Function vect_analyze_data_ref_dependences.
993 Examine all the data references in the loop, and make sure there do not
994 exist any data dependences between them. */
997 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
1000 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1001 struct data_dependence_relation *ddr;
1002 bool check_interleaving = true;
1004 if (vect_print_dump_info (REPORT_DETAILS))
1005 fprintf (vect_dump, "=== vect_analyze_dependences ===");
1007 /* We allow interleaving only if there are no store-store and load-store
1008 dependencies in the loop. */
1009 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1011 if (vect_check_dependences (ddr))
1013 check_interleaving = false;
1018 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1019 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, check_interleaving))
1026 /* Function vect_compute_data_ref_alignment
1028 Compute the misalignment of the data reference DR.
1031 1. If during the misalignment computation it is found that the data reference
1032 cannot be vectorized then false is returned.
1033 2. DR_MISALIGNMENT (DR) is defined.
1035 FOR NOW: No analysis is actually performed. Misalignment is calculated
1036 only for trivial cases. TODO. */
1039 vect_compute_data_ref_alignment (struct data_reference *dr)
1041 tree stmt = DR_STMT (dr);
1042 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1043 tree ref = DR_REF (dr);
1045 tree base, base_addr;
1048 tree aligned_to, alignment;
1050 if (vect_print_dump_info (REPORT_DETAILS))
1051 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1053 /* Initialize misalignment to unknown. */
1054 DR_MISALIGNMENT (dr) = -1;
1056 misalign = DR_OFFSET_MISALIGNMENT (dr);
1057 aligned_to = DR_ALIGNED_TO (dr);
1058 base_addr = DR_BASE_ADDRESS (dr);
1059 base = build_fold_indirect_ref (base_addr);
1060 vectype = STMT_VINFO_VECTYPE (stmt_info);
1061 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1063 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
1066 if (vect_print_dump_info (REPORT_DETAILS))
1068 fprintf (vect_dump, "Unknown alignment for access: ");
1069 print_generic_expr (vect_dump, base, TDF_SLIM);
1075 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
1077 || (TREE_CODE (base_addr) == SSA_NAME
1078 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
1079 TREE_TYPE (base_addr)))),
1081 base_aligned = true;
1083 base_aligned = false;
1087 /* Do not change the alignment of global variables if
1088 flag_section_anchors is enabled. */
1089 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
1090 || (TREE_STATIC (base) && flag_section_anchors))
1092 if (vect_print_dump_info (REPORT_DETAILS))
1094 fprintf (vect_dump, "can't force alignment of ref: ");
1095 print_generic_expr (vect_dump, ref, TDF_SLIM);
1100 /* Force the alignment of the decl.
1101 NOTE: This is the only change to the code we make during
1102 the analysis phase, before deciding to vectorize the loop. */
1103 if (vect_print_dump_info (REPORT_DETAILS))
1104 fprintf (vect_dump, "force alignment");
1105 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1106 DECL_USER_ALIGN (base) = 1;
1109 /* At this point we assume that the base is aligned. */
1110 gcc_assert (base_aligned
1111 || (TREE_CODE (base) == VAR_DECL
1112 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1114 /* Modulo alignment. */
1115 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1117 if (!host_integerp (misalign, 1))
1119 /* Negative or overflowed misalignment value. */
1120 if (vect_print_dump_info (REPORT_DETAILS))
1121 fprintf (vect_dump, "unexpected misalign value");
1125 DR_MISALIGNMENT (dr) = TREE_INT_CST_LOW (misalign);
1127 if (vect_print_dump_info (REPORT_DETAILS))
1129 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
1130 print_generic_expr (vect_dump, ref, TDF_SLIM);
1137 /* Function vect_compute_data_refs_alignment
1139 Compute the misalignment of data references in the loop.
1140 Return FALSE if a data reference is found that cannot be vectorized. */
1143 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1145 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1146 struct data_reference *dr;
1149 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1150 if (!vect_compute_data_ref_alignment (dr))
1157 /* Function vect_update_misalignment_for_peel
1159 DR - the data reference whose misalignment is to be adjusted.
1160 DR_PEEL - the data reference whose misalignment is being made
1161 zero in the vector loop by the peel.
1162 NPEEL - the number of iterations in the peel loop if the misalignment
1163 of DR_PEEL is known at compile time. */
1166 vect_update_misalignment_for_peel (struct data_reference *dr,
1167 struct data_reference *dr_peel, int npeel)
1170 VEC(dr_p,heap) *same_align_drs;
1171 struct data_reference *current_dr;
1172 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1173 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1174 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1175 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1177 /* For interleaved data accesses the step in the loop must be multiplied by
1178 the size of the interleaving group. */
1179 if (DR_GROUP_FIRST_DR (stmt_info))
1180 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
1181 if (DR_GROUP_FIRST_DR (peel_stmt_info))
1182 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1184 if (known_alignment_for_access_p (dr)
1185 && known_alignment_for_access_p (dr_peel)
1186 && (DR_MISALIGNMENT (dr) / dr_size ==
1187 DR_MISALIGNMENT (dr_peel) / dr_peel_size))
1189 DR_MISALIGNMENT (dr) = 0;
1193 /* It can be assumed that the data refs with the same alignment as dr_peel
1194 are aligned in the vector loop. */
1196 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1197 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
1199 if (current_dr != dr)
1201 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1202 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1203 DR_MISALIGNMENT (dr) = 0;
1207 if (known_alignment_for_access_p (dr)
1208 && known_alignment_for_access_p (dr_peel))
1210 DR_MISALIGNMENT (dr) += npeel * dr_size;
1211 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1215 if (vect_print_dump_info (REPORT_DETAILS))
1216 fprintf (vect_dump, "Setting misalignment to -1.");
1217 DR_MISALIGNMENT (dr) = -1;
1221 /* Function vect_verify_datarefs_alignment
1223 Return TRUE if all data references in the loop can be
1224 handled with respect to alignment. */
1227 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
1229 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1230 struct data_reference *dr;
1231 enum dr_alignment_support supportable_dr_alignment;
1234 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1236 tree stmt = DR_STMT (dr);
1237 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1239 /* For interleaving, only the alignment of the first access matters. */
1240 if (DR_GROUP_FIRST_DR (stmt_info)
1241 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1244 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1245 if (!supportable_dr_alignment)
1247 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1249 if (DR_IS_READ (dr))
1251 "not vectorized: unsupported unaligned load.");
1254 "not vectorized: unsupported unaligned store.");
1258 if (supportable_dr_alignment != dr_aligned
1259 && vect_print_dump_info (REPORT_ALIGNMENT))
1260 fprintf (vect_dump, "Vectorizing an unaligned access.");
1266 /* Function vect_enhance_data_refs_alignment
1268 This pass will use loop versioning and loop peeling in order to enhance
1269 the alignment of data references in the loop.
1271 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1272 original loop is to be vectorized; Any other loops that are created by
1273 the transformations performed in this pass - are not supposed to be
1274 vectorized. This restriction will be relaxed.
1276 This pass will require a cost model to guide it whether to apply peeling
1277 or versioning or a combination of the two. For example, the scheme that
1278 intel uses when given a loop with several memory accesses, is as follows:
1279 choose one memory access ('p') which alignment you want to force by doing
1280 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1281 other accesses are not necessarily aligned, or (2) use loop versioning to
1282 generate one loop in which all accesses are aligned, and another loop in
1283 which only 'p' is necessarily aligned.
1285 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1286 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1287 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1289 Devising a cost model is the most critical aspect of this work. It will
1290 guide us on which access to peel for, whether to use loop versioning, how
1291 many versions to create, etc. The cost model will probably consist of
1292 generic considerations as well as target specific considerations (on
1293 powerpc for example, misaligned stores are more painful than misaligned
1296 Here are the general steps involved in alignment enhancements:
1298 -- original loop, before alignment analysis:
1299 for (i=0; i<N; i++){
1300 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1301 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1304 -- After vect_compute_data_refs_alignment:
1305 for (i=0; i<N; i++){
1306 x = q[i]; # DR_MISALIGNMENT(q) = 3
1307 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1310 -- Possibility 1: we do loop versioning:
1312 for (i=0; i<N; i++){ # loop 1A
1313 x = q[i]; # DR_MISALIGNMENT(q) = 3
1314 p[i] = y; # DR_MISALIGNMENT(p) = 0
1318 for (i=0; i<N; i++){ # loop 1B
1319 x = q[i]; # DR_MISALIGNMENT(q) = 3
1320 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1324 -- Possibility 2: we do loop peeling:
1325 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1329 for (i = 3; i < N; i++){ # loop 2A
1330 x = q[i]; # DR_MISALIGNMENT(q) = 0
1331 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1334 -- Possibility 3: combination of loop peeling and versioning:
1335 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1340 for (i = 3; i<N; i++){ # loop 3A
1341 x = q[i]; # DR_MISALIGNMENT(q) = 0
1342 p[i] = y; # DR_MISALIGNMENT(p) = 0
1346 for (i = 3; i<N; i++){ # loop 3B
1347 x = q[i]; # DR_MISALIGNMENT(q) = 0
1348 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1352 These loops are later passed to loop_transform to be vectorized. The
1353 vectorizer will use the alignment information to guide the transformation
1354 (whether to generate regular loads/stores, or with special handling for
1358 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1360 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1361 enum dr_alignment_support supportable_dr_alignment;
1362 struct data_reference *dr0 = NULL;
1363 struct data_reference *dr;
1365 bool do_peeling = false;
1366 bool do_versioning = false;
1369 stmt_vec_info stmt_info;
1371 if (vect_print_dump_info (REPORT_DETAILS))
1372 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1374 /* While cost model enhancements are expected in the future, the high level
1375 view of the code at this time is as follows:
1377 A) If there is a misaligned write then see if peeling to align this write
1378 can make all data references satisfy vect_supportable_dr_alignment.
1379 If so, update data structures as needed and return true. Note that
1380 at this time vect_supportable_dr_alignment is known to return false
1381 for a a misaligned write.
1383 B) If peeling wasn't possible and there is a data reference with an
1384 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1385 then see if loop versioning checks can be used to make all data
1386 references satisfy vect_supportable_dr_alignment. If so, update
1387 data structures as needed and return true.
1389 C) If neither peeling nor versioning were successful then return false if
1390 any data reference does not satisfy vect_supportable_dr_alignment.
1392 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1394 Note, Possibility 3 above (which is peeling and versioning together) is not
1395 being done at this time. */
1397 /* (1) Peeling to force alignment. */
1399 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1401 + How many accesses will become aligned due to the peeling
1402 - How many accesses will become unaligned due to the peeling,
1403 and the cost of misaligned accesses.
1404 - The cost of peeling (the extra runtime checks, the increase
1407 The scheme we use FORNOW: peel to force the alignment of the first
1408 misaligned store in the loop.
1409 Rationale: misaligned stores are not yet supported.
1411 TODO: Use a cost model. */
1413 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1415 stmt = DR_STMT (dr);
1416 stmt_info = vinfo_for_stmt (stmt);
1418 /* For interleaving, only the alignment of the first access
1420 if (DR_GROUP_FIRST_DR (stmt_info)
1421 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1424 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1426 if (DR_GROUP_FIRST_DR (stmt_info))
1428 /* For interleaved access we peel only if number of iterations in
1429 the prolog loop ({VF - misalignment}), is a multiple of the
1430 number of the interelaved accesses. */
1431 int elem_size, mis_in_elements;
1432 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1434 /* FORNOW: handle only known alignment. */
1435 if (!known_alignment_for_access_p (dr))
1441 elem_size = UNITS_PER_SIMD_WORD / vf;
1442 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1444 if ((vf - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1456 /* Often peeling for alignment will require peeling for loop-bound, which in
1457 turn requires that we know how to adjust the loop ivs after the loop. */
1458 if (!vect_can_advance_ivs_p (loop_vinfo))
1466 if (known_alignment_for_access_p (dr0))
1468 /* Since it's known at compile time, compute the number of iterations
1469 in the peeled loop (the peeling factor) for use in updating
1470 DR_MISALIGNMENT values. The peeling factor is the vectorization
1471 factor minus the misalignment as an element count. */
1472 mis = DR_MISALIGNMENT (dr0);
1473 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1474 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1476 /* For interleaved data access every iteration accesses all the
1477 members of the group, therefore we divide the number of iterations
1478 by the group size. */
1479 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1480 if (DR_GROUP_FIRST_DR (stmt_info))
1481 npeel /= DR_GROUP_SIZE (stmt_info);
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "Try peeling by %d", npeel);
1487 /* Ensure that all data refs can be vectorized after the peel. */
1488 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1490 int save_misalignment;
1495 stmt = DR_STMT (dr);
1496 stmt_info = vinfo_for_stmt (stmt);
1497 /* For interleaving, only the alignment of the first access
1499 if (DR_GROUP_FIRST_DR (stmt_info)
1500 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1503 save_misalignment = DR_MISALIGNMENT (dr);
1504 vect_update_misalignment_for_peel (dr, dr0, npeel);
1505 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1506 DR_MISALIGNMENT (dr) = save_misalignment;
1508 if (!supportable_dr_alignment)
1517 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1518 If the misalignment of DR_i is identical to that of dr0 then set
1519 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1520 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1521 by the peeling factor times the element size of DR_i (MOD the
1522 vectorization factor times the size). Otherwise, the
1523 misalignment of DR_i must be set to unknown. */
1524 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1526 vect_update_misalignment_for_peel (dr, dr0, npeel);
1528 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1529 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1530 DR_MISALIGNMENT (dr0) = 0;
1531 if (vect_print_dump_info (REPORT_ALIGNMENT))
1532 fprintf (vect_dump, "Alignment of access forced using peeling.");
1534 if (vect_print_dump_info (REPORT_DETAILS))
1535 fprintf (vect_dump, "Peeling for alignment will be applied.");
1537 stat = vect_verify_datarefs_alignment (loop_vinfo);
1544 /* (2) Versioning to force alignment. */
1546 /* Try versioning if:
1547 1) flag_tree_vect_loop_version is TRUE
1548 2) optimize_size is FALSE
1549 3) there is at least one unsupported misaligned data ref with an unknown
1551 4) all misaligned data refs with a known misalignment are supported, and
1552 5) the number of runtime alignment checks is within reason. */
1554 do_versioning = flag_tree_vect_loop_version && (!optimize_size);
1558 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1560 stmt = DR_STMT (dr);
1561 stmt_info = vinfo_for_stmt (stmt);
1563 /* For interleaving, only the alignment of the first access
1565 if (aligned_access_p (dr)
1566 || (DR_GROUP_FIRST_DR (stmt_info)
1567 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1570 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1572 if (!supportable_dr_alignment)
1578 if (known_alignment_for_access_p (dr)
1579 || VEC_length (tree,
1580 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1581 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_CHECKS))
1583 do_versioning = false;
1587 stmt = DR_STMT (dr);
1588 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1589 gcc_assert (vectype);
1591 /* The rightmost bits of an aligned address must be zeros.
1592 Construct the mask needed for this test. For example,
1593 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1594 mask must be 15 = 0xf. */
1595 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1597 /* FORNOW: use the same mask to test all potentially unaligned
1598 references in the loop. The vectorizer currently supports
1599 a single vector size, see the reference to
1600 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1601 vectorization factor is computed. */
1602 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1603 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1604 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1605 VEC_safe_push (tree, heap,
1606 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1611 /* Versioning requires at least one misaligned data reference. */
1612 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1613 do_versioning = false;
1614 else if (!do_versioning)
1615 VEC_truncate (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1620 VEC(tree,heap) *may_misalign_stmts
1621 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1624 /* It can now be assumed that the data references in the statements
1625 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1626 of the loop being vectorized. */
1627 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, stmt); i++)
1629 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1630 dr = STMT_VINFO_DATA_REF (stmt_info);
1631 DR_MISALIGNMENT (dr) = 0;
1632 if (vect_print_dump_info (REPORT_ALIGNMENT))
1633 fprintf (vect_dump, "Alignment of access forced using versioning.");
1636 if (vect_print_dump_info (REPORT_DETAILS))
1637 fprintf (vect_dump, "Versioning for alignment will be applied.");
1639 /* Peeling and versioning can't be done together at this time. */
1640 gcc_assert (! (do_peeling && do_versioning));
1642 stat = vect_verify_datarefs_alignment (loop_vinfo);
1647 /* This point is reached if neither peeling nor versioning is being done. */
1648 gcc_assert (! (do_peeling || do_versioning));
1650 stat = vect_verify_datarefs_alignment (loop_vinfo);
1655 /* Function vect_analyze_data_refs_alignment
1657 Analyze the alignment of the data-references in the loop.
1658 Return FALSE if a data reference is found that cannot be vectorized. */
1661 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1663 if (vect_print_dump_info (REPORT_DETAILS))
1664 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1666 if (!vect_compute_data_refs_alignment (loop_vinfo))
1668 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1670 "not vectorized: can't calculate alignment for data ref.");
1678 /* Function vect_analyze_data_ref_access.
1680 Analyze the access pattern of the data-reference DR. For now, a data access
1681 has to be consecutive to be considered vectorizable. */
1684 vect_analyze_data_ref_access (struct data_reference *dr)
1686 tree step = DR_STEP (dr);
1687 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1688 tree scalar_type = TREE_TYPE (DR_REF (dr));
1689 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1690 tree stmt = DR_STMT (dr);
1691 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1692 interleaving group (including gaps). */
1693 HOST_WIDE_INT stride = dr_step / type_size;
1697 if (vect_print_dump_info (REPORT_DETAILS))
1698 fprintf (vect_dump, "bad data-ref access");
1703 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1705 /* Mark that it is not interleaving. */
1706 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL_TREE;
1710 /* Not consecutive access is possible only if it is a part of interleaving. */
1711 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1713 /* Check if it this DR is a part of interleaving, and is a single
1714 element of the group that is accessed in the loop. */
1716 /* Gaps are supported only for loads. STEP must be a multiple of the type
1717 size. The size of the group must be a power of 2. */
1719 && (dr_step % type_size) == 0
1721 && exact_log2 (stride) != -1)
1723 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1724 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1725 if (vect_print_dump_info (REPORT_DR_DETAILS))
1727 fprintf (vect_dump, "Detected single element interleaving %d ",
1728 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1729 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1730 fprintf (vect_dump, " step ");
1731 print_generic_expr (vect_dump, step, TDF_SLIM);
1735 if (vect_print_dump_info (REPORT_DETAILS))
1736 fprintf (vect_dump, "not consecutive access");
1740 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1742 /* First stmt in the interleaving chain. Check the chain. */
1743 tree next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1744 struct data_reference *data_ref = dr;
1745 unsigned int count = 1;
1747 tree prev_init = DR_INIT (data_ref);
1749 HOST_WIDE_INT diff, count_in_bytes;
1753 /* Skip same data-refs. In case that two or more stmts share data-ref
1754 (supported only for loads), we vectorize only the first stmt, and
1755 the rest get their vectorized loads from the the first one. */
1756 if (!tree_int_cst_compare (DR_INIT (data_ref),
1757 DR_INIT (STMT_VINFO_DATA_REF (
1758 vinfo_for_stmt (next)))))
1760 /* For load use the same data-ref load. (We check in
1761 vect_check_dependences() that there are no two stores to the
1763 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1766 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1771 /* Check that all the accesses have the same STEP. */
1772 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1773 if (tree_int_cst_compare (step, next_step))
1775 if (vect_print_dump_info (REPORT_DETAILS))
1776 fprintf (vect_dump, "not consecutive access in interleaving");
1780 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1781 /* Check that the distance between two accesses is equal to the type
1782 size. Otherwise, we have gaps. */
1783 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1784 - TREE_INT_CST_LOW (prev_init)) / type_size;
1785 if (!DR_IS_READ (data_ref) && diff != 1)
1787 if (vect_print_dump_info (REPORT_DETAILS))
1788 fprintf (vect_dump, "interleaved store with gaps");
1791 /* Store the gap from the previous member of the group. If there is no
1792 gap in the access, DR_GROUP_GAP is always 1. */
1793 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1795 prev_init = DR_INIT (data_ref);
1796 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1797 /* Count the number of data-refs in the chain. */
1801 /* COUNT is the number of accesses found, we multiply it by the size of
1802 the type to get COUNT_IN_BYTES. */
1803 count_in_bytes = type_size * count;
1804 /* Check the size of the interleaving is not greater than STEP. */
1805 if (dr_step < count_in_bytes)
1807 if (vect_print_dump_info (REPORT_DETAILS))
1809 fprintf (vect_dump, "interleaving size is greater than step for ");
1810 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1815 /* Check that STEP is a multiple of type size. */
1816 if ((dr_step % type_size) != 0)
1818 if (vect_print_dump_info (REPORT_DETAILS))
1820 fprintf (vect_dump, "step is not a multiple of type size: step ");
1821 print_generic_expr (vect_dump, step, TDF_SLIM);
1822 fprintf (vect_dump, " size ");
1823 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1829 /* FORNOW: we handle only interleaving that is a power of 2. */
1830 if (exact_log2 (stride) == -1)
1832 if (vect_print_dump_info (REPORT_DETAILS))
1833 fprintf (vect_dump, "interleaving is not a power of 2");
1836 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1842 /* Function vect_analyze_data_ref_accesses.
1844 Analyze the access pattern of all the data references in the loop.
1846 FORNOW: the only access pattern that is considered vectorizable is a
1847 simple step 1 (consecutive) access.
1849 FORNOW: handle only arrays and pointer accesses. */
1852 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1855 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1856 struct data_reference *dr;
1858 if (vect_print_dump_info (REPORT_DETAILS))
1859 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1861 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1862 if (!vect_analyze_data_ref_access (dr))
1864 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1865 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1873 /* Function vect_analyze_data_refs.
1875 Find all the data references in the loop.
1877 The general structure of the analysis of data refs in the vectorizer is as
1879 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1880 find and analyze all data-refs in the loop and their dependences.
1881 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1882 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1883 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1888 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1890 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1892 VEC (data_reference_p, heap) *datarefs;
1893 struct data_reference *dr;
1896 if (vect_print_dump_info (REPORT_DETAILS))
1897 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1899 compute_data_dependences_for_loop (loop, true,
1900 &LOOP_VINFO_DATAREFS (loop_vinfo),
1901 &LOOP_VINFO_DDRS (loop_vinfo));
1903 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1904 from stmt_vec_info struct to DR and vectype. */
1905 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1907 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1910 stmt_vec_info stmt_info;
1912 if (!dr || !DR_REF (dr))
1914 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1915 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1919 /* Update DR field in stmt_vec_info struct. */
1920 stmt = DR_STMT (dr);
1921 stmt_info = vinfo_for_stmt (stmt);
1923 if (STMT_VINFO_DATA_REF (stmt_info))
1925 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1928 "not vectorized: more than one data ref in stmt: ");
1929 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1933 STMT_VINFO_DATA_REF (stmt_info) = dr;
1935 /* Check that analysis of the data-ref succeeded. */
1936 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1941 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1942 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1946 if (!DR_MEMTAG (dr))
1948 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1950 fprintf (vect_dump, "not vectorized: no memory tag for ");
1951 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1956 /* Set vectype for STMT. */
1957 scalar_type = TREE_TYPE (DR_REF (dr));
1958 STMT_VINFO_VECTYPE (stmt_info) =
1959 get_vectype_for_scalar_type (scalar_type);
1960 if (!STMT_VINFO_VECTYPE (stmt_info))
1962 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1965 "not vectorized: no vectype for stmt: ");
1966 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1967 fprintf (vect_dump, " scalar_type: ");
1968 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1978 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1980 /* Function vect_mark_relevant.
1982 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1985 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
1986 enum vect_relevant relevant, bool live_p)
1988 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1989 enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
1990 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
1992 if (vect_print_dump_info (REPORT_DETAILS))
1993 fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
1995 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1999 /* This is the last stmt in a sequence that was detected as a
2000 pattern that can potentially be vectorized. Don't mark the stmt
2001 as relevant/live because it's not going to vectorized.
2002 Instead mark the pattern-stmt that replaces it. */
2003 if (vect_print_dump_info (REPORT_DETAILS))
2004 fprintf (vect_dump, "last stmt in pattern. don't mark relevant/live.");
2005 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2006 stmt_info = vinfo_for_stmt (pattern_stmt);
2007 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
2008 save_relevant = STMT_VINFO_RELEVANT (stmt_info);
2009 save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2010 stmt = pattern_stmt;
2013 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2014 if (relevant > STMT_VINFO_RELEVANT (stmt_info))
2015 STMT_VINFO_RELEVANT (stmt_info) = relevant;
2017 if (TREE_CODE (stmt) == PHI_NODE)
2018 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2019 or live will fail vectorization later on. */
2022 if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
2023 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2025 if (vect_print_dump_info (REPORT_DETAILS))
2026 fprintf (vect_dump, "already marked relevant/live.");
2030 VEC_safe_push (tree, heap, *worklist, stmt);
2034 /* Function vect_stmt_relevant_p.
2036 Return true if STMT in loop that is represented by LOOP_VINFO is
2037 "relevant for vectorization".
2039 A stmt is considered "relevant for vectorization" if:
2040 - it has uses outside the loop.
2041 - it has vdefs (it alters memory).
2042 - control stmts in the loop (except for the exit condition).
2044 CHECKME: what other side effects would the vectorizer allow? */
2047 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2048 enum vect_relevant *relevant, bool *live_p)
2050 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2051 ssa_op_iter op_iter;
2052 imm_use_iterator imm_iter;
2053 use_operand_p use_p;
2054 def_operand_p def_p;
2056 *relevant = vect_unused_in_loop;
2059 /* cond stmt other than loop exit cond. */
2060 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2061 *relevant = vect_used_in_loop;
2063 /* changing memory. */
2064 if (TREE_CODE (stmt) != PHI_NODE)
2065 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2067 if (vect_print_dump_info (REPORT_DETAILS))
2068 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2069 *relevant = vect_used_in_loop;
2072 /* uses outside the loop. */
2073 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2075 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2077 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2078 if (!flow_bb_inside_loop_p (loop, bb))
2080 if (vect_print_dump_info (REPORT_DETAILS))
2081 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2083 /* We expect all such uses to be in the loop exit phis
2084 (because of loop closed form) */
2085 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2086 gcc_assert (bb == single_exit (loop)->dest);
2093 return (*live_p || *relevant);
2097 /* Function vect_mark_stmts_to_be_vectorized.
2099 Not all stmts in the loop need to be vectorized. For example:
2108 Stmt 1 and 3 do not need to be vectorized, because loop control and
2109 addressing of vectorized data-refs are handled differently.
2111 This pass detects such stmts. */
2114 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2116 VEC(tree,heap) *worklist;
2117 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2118 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2119 unsigned int nbbs = loop->num_nodes;
2120 block_stmt_iterator si;
2125 stmt_vec_info stmt_vinfo;
2129 enum vect_relevant relevant;
2131 enum vect_def_type dt;
2133 if (vect_print_dump_info (REPORT_DETAILS))
2134 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2136 worklist = VEC_alloc (tree, heap, 64);
2138 /* 1. Init worklist. */
2141 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2143 if (vect_print_dump_info (REPORT_DETAILS))
2145 fprintf (vect_dump, "init: phi relevant? ");
2146 print_generic_expr (vect_dump, phi, TDF_SLIM);
2149 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
2150 vect_mark_relevant (&worklist, phi, relevant, live_p);
2153 for (i = 0; i < nbbs; i++)
2156 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2158 stmt = bsi_stmt (si);
2160 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "init: stmt relevant? ");
2163 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2166 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
2167 vect_mark_relevant (&worklist, stmt, relevant, live_p);
2172 /* 2. Process_worklist */
2174 while (VEC_length (tree, worklist) > 0)
2176 stmt = VEC_pop (tree, worklist);
2178 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "worklist: examine stmt: ");
2181 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2184 /* Examine the USEs of STMT. For each ssa-name USE that is defined
2185 in the loop, mark the stmt that defines it (DEF_STMT) as
2186 relevant/irrelevant and live/dead according to the liveness and
2187 relevance properties of STMT.
2190 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2192 ann = stmt_ann (stmt);
2193 stmt_vinfo = vinfo_for_stmt (stmt);
2195 relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
2196 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2198 /* Generally, the liveness and relevance properties of STMT are
2199 propagated to the DEF_STMTs of its USEs:
2200 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2201 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
2206 If USE is used only for address computations (e.g. array indexing),
2207 which does not need to be directly vectorized, then the
2208 liveness/relevance of the respective DEF_STMT is left unchanged.
2211 If STMT has been identified as defining a reduction variable, then
2214 The last use of STMT is the reduction-variable, which is defined
2215 by a loop-header-phi. We don't want to mark the phi as live or
2216 relevant (because it does not need to be vectorized, it is handled
2217 as part of the vectorization of the reduction), so in this case we
2218 skip the call to vect_mark_relevant.
2220 The rest of the uses of STMT are defined in the loop body. For
2221 the def_stmt of these uses we want to set liveness/relevance
2223 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2224 STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
2225 because even though STMT is classified as live (since it defines a
2226 value that is used across loop iterations) and irrelevant (since it
2227 is not used inside the loop), it will be vectorized, and therefore
2228 the corresponding DEF_STMTs need to marked as relevant.
2229 We distinguish between two kinds of relevant stmts - those that are
2230 used by a reduction conputation, and those that are (also) used by a regular computation. This allows us later on to identify stmts
2231 that are used solely by a reduction, and therefore the order of
2232 the results that they produce does not have to be kept.
2236 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2238 gcc_assert (relevant == vect_unused_in_loop && live_p);
2239 relevant = vect_used_by_reduction;
2243 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2245 /* case 1: we are only interested in uses that need to be vectorized.
2246 Uses that are used for address computation are not considered
2249 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2252 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2254 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2255 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2256 VEC_free (tree, heap, worklist);
2260 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2263 if (vect_print_dump_info (REPORT_DETAILS))
2265 fprintf (vect_dump, "worklist: examine use %d: ", i);
2266 print_generic_expr (vect_dump, use, TDF_SLIM);
2269 bb = bb_for_stmt (def_stmt);
2270 if (!flow_bb_inside_loop_p (loop, bb))
2273 /* case 2.1: the reduction-use does not mark the defining-phi
2275 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2276 && TREE_CODE (def_stmt) == PHI_NODE)
2279 vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
2281 } /* while worklist */
2283 VEC_free (tree, heap, worklist);
2288 /* Function vect_can_advance_ivs_p
2290 In case the number of iterations that LOOP iterates is unknown at compile
2291 time, an epilog loop will be generated, and the loop induction variables
2292 (IVs) will be "advanced" to the value they are supposed to take just before
2293 the epilog loop. Here we check that the access function of the loop IVs
2294 and the expression that represents the loop bound are simple enough.
2295 These restrictions will be relaxed in the future. */
2298 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2300 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2301 basic_block bb = loop->header;
2304 /* Analyze phi functions of the loop header. */
2306 if (vect_print_dump_info (REPORT_DETAILS))
2307 fprintf (vect_dump, "vect_can_advance_ivs_p:");
2309 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2311 tree access_fn = NULL;
2312 tree evolution_part;
2314 if (vect_print_dump_info (REPORT_DETAILS))
2316 fprintf (vect_dump, "Analyze phi: ");
2317 print_generic_expr (vect_dump, phi, TDF_SLIM);
2320 /* Skip virtual phi's. The data dependences that are associated with
2321 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2323 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2325 if (vect_print_dump_info (REPORT_DETAILS))
2326 fprintf (vect_dump, "virtual phi. skip.");
2330 /* Skip reduction phis. */
2332 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2334 if (vect_print_dump_info (REPORT_DETAILS))
2335 fprintf (vect_dump, "reduc phi. skip.");
2339 /* Analyze the evolution function. */
2341 access_fn = instantiate_parameters
2342 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2346 if (vect_print_dump_info (REPORT_DETAILS))
2347 fprintf (vect_dump, "No Access function.");
2351 if (vect_print_dump_info (REPORT_DETAILS))
2353 fprintf (vect_dump, "Access function of PHI: ");
2354 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2357 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2359 if (evolution_part == NULL_TREE)
2361 if (vect_print_dump_info (REPORT_DETAILS))
2362 fprintf (vect_dump, "No evolution.");
2366 /* FORNOW: We do not transform initial conditions of IVs
2367 which evolution functions are a polynomial of degree >= 2. */
2369 if (tree_is_chrec (evolution_part))
2377 /* Function vect_get_loop_niters.
2379 Determine how many iterations the loop is executed.
2380 If an expression that represents the number of iterations
2381 can be constructed, place it in NUMBER_OF_ITERATIONS.
2382 Return the loop exit condition. */
2385 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2389 if (vect_print_dump_info (REPORT_DETAILS))
2390 fprintf (vect_dump, "=== get_loop_niters ===");
2392 niters = number_of_iterations_in_loop (loop);
2394 if (niters != NULL_TREE
2395 && niters != chrec_dont_know)
2397 *number_of_iterations = niters;
2399 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "==> get_loop_niters:" );
2402 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2406 return get_loop_exit_condition (loop);
2410 /* Function vect_analyze_loop_form.
2412 Verify the following restrictions (some may be relaxed in the future):
2413 - it's an inner-most loop
2414 - number of BBs = 2 (which are the loop header and the latch)
2415 - the loop has a pre-header
2416 - the loop has a single entry and exit
2417 - the loop exit condition is simple enough, and the number of iterations
2418 can be analyzed (a countable loop). */
2420 static loop_vec_info
2421 vect_analyze_loop_form (struct loop *loop)
2423 loop_vec_info loop_vinfo;
2425 tree number_of_iterations = NULL;
2427 if (vect_print_dump_info (REPORT_DETAILS))
2428 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2432 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2433 fprintf (vect_dump, "not vectorized: nested loop.");
2437 if (!single_exit (loop)
2438 || loop->num_nodes != 2
2439 || EDGE_COUNT (loop->header->preds) != 2)
2441 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2443 if (!single_exit (loop))
2444 fprintf (vect_dump, "not vectorized: multiple exits.");
2445 else if (loop->num_nodes != 2)
2446 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2447 else if (EDGE_COUNT (loop->header->preds) != 2)
2448 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2454 /* We assume that the loop exit condition is at the end of the loop. i.e,
2455 that the loop is represented as a do-while (with a proper if-guard
2456 before the loop if needed), where the loop header contains all the
2457 executable statements, and the latch is empty. */
2458 if (!empty_block_p (loop->latch)
2459 || phi_nodes (loop->latch))
2461 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2462 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2466 /* Make sure there exists a single-predecessor exit bb: */
2467 if (!single_pred_p (single_exit (loop)->dest))
2469 edge e = single_exit (loop);
2470 if (!(e->flags & EDGE_ABNORMAL))
2472 split_loop_exit_edge (e);
2473 if (vect_print_dump_info (REPORT_DETAILS))
2474 fprintf (vect_dump, "split exit edge.");
2478 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2479 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2484 if (empty_block_p (loop->header))
2486 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2487 fprintf (vect_dump, "not vectorized: empty loop.");
2491 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2494 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2495 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2499 if (!number_of_iterations)
2501 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2503 "not vectorized: number of iterations cannot be computed.");
2507 if (chrec_contains_undetermined (number_of_iterations))
2509 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2510 fprintf (vect_dump, "Infinite number of iterations.");
2514 loop_vinfo = new_loop_vec_info (loop);
2515 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2517 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2519 if (vect_print_dump_info (REPORT_DETAILS))
2521 fprintf (vect_dump, "Symbolic number of iterations is ");
2522 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2526 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2528 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2529 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2533 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2539 /* Function vect_analyze_loop.
2541 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2542 for it. The different analyses will record information in the
2543 loop_vec_info struct. */
2545 vect_analyze_loop (struct loop *loop)
2548 loop_vec_info loop_vinfo;
2550 if (vect_print_dump_info (REPORT_DETAILS))
2551 fprintf (vect_dump, "===== analyze_loop_nest =====");
2553 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2555 loop_vinfo = vect_analyze_loop_form (loop);
2558 if (vect_print_dump_info (REPORT_DETAILS))
2559 fprintf (vect_dump, "bad loop form.");
2563 /* Find all data references in the loop (which correspond to vdefs/vuses)
2564 and analyze their evolution in the loop.
2566 FORNOW: Handle only simple, array references, which
2567 alignment can be forced, and aligned pointer-references. */
2569 ok = vect_analyze_data_refs (loop_vinfo);
2572 if (vect_print_dump_info (REPORT_DETAILS))
2573 fprintf (vect_dump, "bad data references.");
2574 destroy_loop_vec_info (loop_vinfo);
2578 /* Classify all cross-iteration scalar data-flow cycles.
2579 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2581 vect_analyze_scalar_cycles (loop_vinfo);
2583 vect_pattern_recog (loop_vinfo);
2585 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2587 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2590 if (vect_print_dump_info (REPORT_DETAILS))
2591 fprintf (vect_dump, "unexpected pattern.");
2592 destroy_loop_vec_info (loop_vinfo);
2596 /* Analyze the alignment of the data-refs in the loop.
2597 Fail if a data reference is found that cannot be vectorized. */
2599 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2602 if (vect_print_dump_info (REPORT_DETAILS))
2603 fprintf (vect_dump, "bad data alignment.");
2604 destroy_loop_vec_info (loop_vinfo);
2608 ok = vect_determine_vectorization_factor (loop_vinfo);
2611 if (vect_print_dump_info (REPORT_DETAILS))
2612 fprintf (vect_dump, "can't determine vectorization factor.");
2613 destroy_loop_vec_info (loop_vinfo);
2617 /* Analyze data dependences between the data-refs in the loop.
2618 FORNOW: fail at the first data dependence that we encounter. */
2620 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2623 if (vect_print_dump_info (REPORT_DETAILS))
2624 fprintf (vect_dump, "bad data dependence.");
2625 destroy_loop_vec_info (loop_vinfo);
2629 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2630 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2632 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2635 if (vect_print_dump_info (REPORT_DETAILS))
2636 fprintf (vect_dump, "bad data access.");
2637 destroy_loop_vec_info (loop_vinfo);
2641 /* This pass will decide on using loop versioning and/or loop peeling in
2642 order to enhance the alignment of data references in the loop. */
2644 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2647 if (vect_print_dump_info (REPORT_DETAILS))
2648 fprintf (vect_dump, "bad data alignment.");
2649 destroy_loop_vec_info (loop_vinfo);
2653 /* Scan all the operations in the loop and make sure they are
2656 ok = vect_analyze_operations (loop_vinfo);
2659 if (vect_print_dump_info (REPORT_DETAILS))
2660 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2661 destroy_loop_vec_info (loop_vinfo);
2665 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;