1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
28 #include "basic-block.h"
29 #include "diagnostic.h"
30 #include "tree-flow.h"
31 #include "tree-dump.h"
36 #include "tree-chrec.h"
37 #include "tree-data-ref.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
41 /* Main analysis functions. */
42 static loop_vec_info vect_analyze_loop_form (struct loop *);
43 static bool vect_analyze_data_refs (loop_vec_info);
44 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
45 static void vect_analyze_scalar_cycles (loop_vec_info);
46 static bool vect_analyze_data_ref_accesses (loop_vec_info);
47 static bool vect_analyze_data_ref_dependences (loop_vec_info);
48 static bool vect_analyze_data_refs_alignment (loop_vec_info);
49 static bool vect_compute_data_refs_alignment (loop_vec_info);
50 static void vect_enhance_data_refs_alignment (loop_vec_info);
51 static bool vect_analyze_operations (loop_vec_info);
52 static bool vect_determine_vectorization_factor (loop_vec_info);
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_reference *, struct data_reference *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static struct data_reference * vect_analyze_pointer_ref_access
64 (tree, tree, bool, tree, tree *, tree *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static tree vect_get_ptr_offset (tree, tree, tree *);
67 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
69 static bool vect_base_addr_differ_p (struct data_reference *,
70 struct data_reference *drb, bool *);
71 static tree vect_object_analysis (tree, tree, bool, tree,
72 struct data_reference **, tree *, tree *,
73 tree *, bool *, tree *, struct ptr_info_def **,
75 static tree vect_address_analysis (tree, tree, bool, tree,
76 struct data_reference *, tree *, tree *,
80 /* Function vect_get_ptr_offset
82 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
85 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
86 tree vectype ATTRIBUTE_UNUSED,
87 tree *offset ATTRIBUTE_UNUSED)
89 /* TODO: Use alignment information. */
94 /* Function vect_analyze_offset_expr
96 Given an offset expression EXPR received from get_inner_reference, analyze
97 it and create an expression for INITIAL_OFFSET by substituting the variables
98 of EXPR with initial_condition of the corresponding access_fn in the loop.
101 for (j = 3; j < N; j++)
104 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
105 substituted, since its access_fn in the inner loop is i. 'j' will be
106 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
109 Compute MISALIGN (the misalignment of the data reference initial access from
110 its base) if possible. Misalignment can be calculated only if all the
111 variables can be substituted with constants, or if a variable is multiplied
112 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
113 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
114 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
115 VECTYPE_ALIGNMENT computation in the caller of this function).
117 STEP is an evolution of the data reference in this loop in bytes.
118 In the above example, STEP is C_j.
120 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
121 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
122 are NULL_TREEs. Otherwise, return TRUE.
127 vect_analyze_offset_expr (tree expr,
129 tree vectype_alignment,
130 tree *initial_offset,
136 tree left_offset = ssize_int (0);
137 tree right_offset = ssize_int (0);
138 tree left_misalign = ssize_int (0);
139 tree right_misalign = ssize_int (0);
140 tree left_step = ssize_int (0);
141 tree right_step = ssize_int (0);
143 tree init, evolution;
146 *misalign = NULL_TREE;
147 *initial_offset = NULL_TREE;
149 /* Strip conversions that don't narrow the mode. */
150 expr = vect_strip_conversion (expr);
156 if (TREE_CODE (expr) == INTEGER_CST)
158 *initial_offset = fold_convert (ssizetype, expr);
159 *misalign = fold_convert (ssizetype, expr);
160 *step = ssize_int (0);
164 /* 2. Variable. Try to substitute with initial_condition of the corresponding
165 access_fn in the current loop. */
166 if (SSA_VAR_P (expr))
168 tree access_fn = analyze_scalar_evolution (loop, expr);
170 if (access_fn == chrec_dont_know)
174 init = initial_condition_in_loop_num (access_fn, loop->num);
175 if (init == expr && !expr_invariant_in_loop_p (loop, init))
176 /* Not enough information: may be not loop invariant.
177 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
178 initial_condition is D, but it depends on i - loop's induction
182 evolution = evolution_part_in_loop_num (access_fn, loop->num);
183 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
184 /* Evolution is not constant. */
187 if (TREE_CODE (init) == INTEGER_CST)
188 *misalign = fold_convert (ssizetype, init);
190 /* Not constant, misalignment cannot be calculated. */
191 *misalign = NULL_TREE;
193 *initial_offset = fold_convert (ssizetype, init);
195 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
199 /* Recursive computation. */
200 if (!BINARY_CLASS_P (expr))
202 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
203 if (vect_print_dump_info (REPORT_DETAILS))
205 fprintf (vect_dump, "Not binary expression ");
206 print_generic_expr (vect_dump, expr, TDF_SLIM);
210 oprnd0 = TREE_OPERAND (expr, 0);
211 oprnd1 = TREE_OPERAND (expr, 1);
213 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
214 &left_misalign, &left_step)
215 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
216 &right_offset, &right_misalign, &right_step))
219 /* The type of the operation: plus, minus or mult. */
220 code = TREE_CODE (expr);
224 if (TREE_CODE (right_offset) != INTEGER_CST)
225 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
227 FORNOW: We don't support such cases. */
230 /* Strip conversions that don't narrow the mode. */
231 left_offset = vect_strip_conversion (left_offset);
234 /* Misalignment computation. */
235 if (SSA_VAR_P (left_offset))
237 /* If the left side contains variables that can't be substituted with
238 constants, we check if the right side is a multiple of ALIGNMENT.
240 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
241 fold_convert (ssizetype, vectype_alignment))))
242 *misalign = ssize_int (0);
244 /* If the remainder is not zero or the right side isn't constant,
245 we can't compute misalignment. */
246 *misalign = NULL_TREE;
250 /* The left operand was successfully substituted with constant. */
252 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
254 *misalign = size_binop (code, left_misalign, right_misalign);
256 *misalign = NULL_TREE;
259 /* Step calculation. */
260 /* Multiply the step by the right operand. */
261 *step = size_binop (MULT_EXPR, left_step, right_offset);
266 /* Combine the recursive calculations for step and misalignment. */
267 *step = size_binop (code, left_step, right_step);
269 if (left_misalign && right_misalign)
270 *misalign = size_binop (code, left_misalign, right_misalign);
272 *misalign = NULL_TREE;
280 /* Compute offset. */
281 *initial_offset = fold_convert (ssizetype,
282 fold_build2 (code, TREE_TYPE (left_offset),
289 /* Function vect_determine_vectorization_factor
291 Determine the vectorization factor (VF). VF is the number of data elements
292 that are operated upon in parallel in a single iteration of the vectorized
293 loop. For example, when vectorizing a loop that operates on 4byte elements,
294 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
295 elements can fit in a single vector register.
297 We currently support vectorization of loops in which all types operated upon
298 are of the same size. Therefore this function currently sets VF according to
299 the size of the types operated upon, and fails if there are multiple sizes
302 VF is also the factor by which the loop iterations are strip-mined, e.g.:
309 for (i=0; i<N; i+=VF){
310 a[i:VF] = b[i:VF] + c[i:VF];
315 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
318 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
319 int nbbs = loop->num_nodes;
320 block_stmt_iterator si;
321 unsigned int vectorization_factor = 0;
325 if (vect_print_dump_info (REPORT_DETAILS))
326 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
328 for (i = 0; i < nbbs; i++)
330 basic_block bb = bbs[i];
332 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
334 tree stmt = bsi_stmt (si);
336 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
339 if (vect_print_dump_info (REPORT_DETAILS))
341 fprintf (vect_dump, "==> examining statement: ");
342 print_generic_expr (vect_dump, stmt, TDF_SLIM);
345 gcc_assert (stmt_info);
346 /* skip stmts which do not need to be vectorized. */
347 if (!STMT_VINFO_RELEVANT_P (stmt_info)
348 && !STMT_VINFO_LIVE_P (stmt_info))
350 if (vect_print_dump_info (REPORT_DETAILS))
351 fprintf (vect_dump, "skip.");
355 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
359 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
360 print_generic_expr (vect_dump, stmt, TDF_SLIM);
365 if (STMT_VINFO_DATA_REF (stmt_info))
366 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
367 else if (TREE_CODE (stmt) == MODIFY_EXPR)
368 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
370 scalar_type = TREE_TYPE (stmt);
372 if (vect_print_dump_info (REPORT_DETAILS))
374 fprintf (vect_dump, "get vectype for scalar type: ");
375 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
378 vectype = get_vectype_for_scalar_type (scalar_type);
381 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
383 fprintf (vect_dump, "not vectorized: unsupported data-type ");
384 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
388 if (vect_print_dump_info (REPORT_DETAILS))
390 fprintf (vect_dump, "vectype: ");
391 print_generic_expr (vect_dump, vectype, TDF_SLIM);
393 STMT_VINFO_VECTYPE (stmt_info) = vectype;
395 nunits = TYPE_VECTOR_SUBPARTS (vectype);
396 if (vect_print_dump_info (REPORT_DETAILS))
397 fprintf (vect_dump, "nunits = %d", nunits);
399 if (vectorization_factor)
401 /* FORNOW: don't allow mixed units.
402 This restriction will be relaxed in the future. */
403 if (nunits != vectorization_factor)
405 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
406 fprintf (vect_dump, "not vectorized: mixed data-types");
411 vectorization_factor = nunits;
413 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
414 * vectorization_factor == UNITS_PER_SIMD_WORD);
418 /* TODO: Analyze cost. Decide if worth while to vectorize. */
420 if (vectorization_factor <= 1)
422 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
423 fprintf (vect_dump, "not vectorized: unsupported data-type");
426 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
432 /* Function vect_analyze_operations.
434 Scan the loop stmts and make sure they are all vectorizable. */
437 vect_analyze_operations (loop_vec_info loop_vinfo)
439 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
440 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
441 int nbbs = loop->num_nodes;
442 block_stmt_iterator si;
443 unsigned int vectorization_factor = 0;
447 stmt_vec_info stmt_info;
448 bool need_to_vectorize = false;
450 if (vect_print_dump_info (REPORT_DETAILS))
451 fprintf (vect_dump, "=== vect_analyze_operations ===");
453 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
454 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
456 for (i = 0; i < nbbs; i++)
458 basic_block bb = bbs[i];
460 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
462 stmt_info = vinfo_for_stmt (phi);
463 if (vect_print_dump_info (REPORT_DETAILS))
465 fprintf (vect_dump, "examining phi: ");
466 print_generic_expr (vect_dump, phi, TDF_SLIM);
469 gcc_assert (stmt_info);
471 if (STMT_VINFO_LIVE_P (stmt_info))
473 /* FORNOW: not yet supported. */
474 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
475 fprintf (vect_dump, "not vectorized: value used after loop.");
479 if (STMT_VINFO_RELEVANT_P (stmt_info))
481 /* Most likely a reduction-like computation that is used
483 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
484 fprintf (vect_dump, "not vectorized: unsupported pattern.");
489 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
491 tree stmt = bsi_stmt (si);
492 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
494 if (vect_print_dump_info (REPORT_DETAILS))
496 fprintf (vect_dump, "==> examining statement: ");
497 print_generic_expr (vect_dump, stmt, TDF_SLIM);
500 gcc_assert (stmt_info);
502 /* skip stmts which do not need to be vectorized.
503 this is expected to include:
504 - the COND_EXPR which is the loop exit condition
505 - any LABEL_EXPRs in the loop
506 - computations that are used only for array indexing or loop
509 if (!STMT_VINFO_RELEVANT_P (stmt_info)
510 && !STMT_VINFO_LIVE_P (stmt_info))
512 if (vect_print_dump_info (REPORT_DETAILS))
513 fprintf (vect_dump, "irrelevant.");
517 if (STMT_VINFO_RELEVANT_P (stmt_info))
519 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
520 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
522 ok = (vectorizable_operation (stmt, NULL, NULL)
523 || vectorizable_assignment (stmt, NULL, NULL)
524 || vectorizable_load (stmt, NULL, NULL)
525 || vectorizable_store (stmt, NULL, NULL)
526 || vectorizable_condition (stmt, NULL, NULL));
530 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
533 "not vectorized: relevant stmt not supported: ");
534 print_generic_expr (vect_dump, stmt, TDF_SLIM);
538 need_to_vectorize = true;
541 if (STMT_VINFO_LIVE_P (stmt_info))
543 ok = vectorizable_reduction (stmt, NULL, NULL);
546 need_to_vectorize = true;
548 ok = vectorizable_live_operation (stmt, NULL, NULL);
552 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
555 "not vectorized: live stmt not supported: ");
556 print_generic_expr (vect_dump, stmt, TDF_SLIM);
564 /* TODO: Analyze cost. Decide if worth while to vectorize. */
566 /* All operations in the loop are either irrelevant (deal with loop
567 control, or dead), or only used outside the loop and can be moved
568 out of the loop (e.g. invariants, inductions). The loop can be
569 optimized away by scalar optimizations. We're better off not
570 touching this loop. */
571 if (!need_to_vectorize)
573 if (vect_print_dump_info (REPORT_DETAILS))
575 "All the computation can be taken out of the loop.");
576 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
578 "not vectorized: redundant loop. no profit to vectorize.");
582 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
583 && vect_print_dump_info (REPORT_DETAILS))
585 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
586 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
588 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
589 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
591 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
592 fprintf (vect_dump, "not vectorized: iteration count too small.");
596 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
597 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
599 if (vect_print_dump_info (REPORT_DETAILS))
600 fprintf (vect_dump, "epilog loop required.");
601 if (!vect_can_advance_ivs_p (loop_vinfo))
603 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
605 "not vectorized: can't create epilog loop 1.");
608 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
610 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
612 "not vectorized: can't create epilog loop 2.");
621 /* Function exist_non_indexing_operands_for_use_p
623 USE is one of the uses attached to STMT. Check if USE is
624 used in STMT for anything other than indexing an array. */
627 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
630 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
632 /* USE corresponds to some operand in STMT. If there is no data
633 reference in STMT, then any operand that corresponds to USE
634 is not indexing an array. */
635 if (!STMT_VINFO_DATA_REF (stmt_info))
638 /* STMT has a data_ref. FORNOW this means that its of one of
642 (This should have been verified in analyze_data_refs).
644 'var' in the second case corresponds to a def, not a use,
645 so USE cannot correspond to any operands that are not used
648 Therefore, all we need to check is if STMT falls into the
649 first case, and whether var corresponds to USE. */
651 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
654 operand = TREE_OPERAND (stmt, 1);
656 if (TREE_CODE (operand) != SSA_NAME)
666 /* Function vect_analyze_scalar_cycles.
668 Examine the cross iteration def-use cycles of scalar variables, by
669 analyzing the loop (scalar) PHIs; Classify each cycle as one of the
670 following: invariant, induction, reduction, unknown.
672 Some forms of scalar cycles are not yet supported.
674 Example1: reduction: (unsupported yet)
680 Example2: induction: (unsupported yet)
686 Note: the following loop *is* vectorizable:
692 even though it has a def-use cycle caused by the induction variable i:
694 loop: i_2 = PHI (i_0, i_1)
699 because the def-use cycle in loop3 is considered "not relevant" - i.e.,
700 it does not need to be vectorized because it is only used for array
701 indexing (see 'mark_stmts_to_be_vectorized'). The def-use cycle in
702 loop2 on the other hand is relevant (it is being written to memory).
706 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
709 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
710 basic_block bb = loop->header;
713 if (vect_print_dump_info (REPORT_DETAILS))
714 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
716 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
718 tree access_fn = NULL;
719 tree def = PHI_RESULT (phi);
720 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
723 if (vect_print_dump_info (REPORT_DETAILS))
725 fprintf (vect_dump, "Analyze phi: ");
726 print_generic_expr (vect_dump, phi, TDF_SLIM);
729 /* Skip virtual phi's. The data dependences that are associated with
730 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
732 if (!is_gimple_reg (SSA_NAME_VAR (def)))
734 if (vect_print_dump_info (REPORT_DETAILS))
735 fprintf (vect_dump, "virtual phi. skip.");
739 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_unknown_def_type;
741 /* Analyze the evolution function. */
743 access_fn = analyze_scalar_evolution (loop, def);
748 if (vect_print_dump_info (REPORT_DETAILS))
750 fprintf (vect_dump, "Access function of PHI: ");
751 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
754 if (vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
756 if (vect_print_dump_info (REPORT_DETAILS))
757 fprintf (vect_dump, "Detected induction.");
758 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_induction_def;
762 /* TODO: handle invariant phis */
764 reduc_stmt = vect_is_simple_reduction (loop, phi);
767 if (vect_print_dump_info (REPORT_DETAILS))
768 fprintf (vect_dump, "Detected reduction.");
769 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
770 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) =
774 if (vect_print_dump_info (REPORT_DETAILS))
775 fprintf (vect_dump, "Unknown def-use cycle pattern.");
783 /* Function vect_base_addr_differ_p.
785 This is the simplest data dependence test: determines whether the
786 data references A and B access the same array/region. Returns
787 false when the property is not computable at compile time.
788 Otherwise return true, and DIFFER_P will record the result. This
789 utility will not be necessary when alias_sets_conflict_p will be
790 less conservative. */
793 vect_base_addr_differ_p (struct data_reference *dra,
794 struct data_reference *drb,
797 tree stmt_a = DR_STMT (dra);
798 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
799 tree stmt_b = DR_STMT (drb);
800 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
801 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
802 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
803 tree type_a = TREE_TYPE (addr_a);
804 tree type_b = TREE_TYPE (addr_b);
805 HOST_WIDE_INT alias_set_a, alias_set_b;
807 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
809 /* Both references are ADDR_EXPR, i.e., we have the objects. */
810 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
811 return array_base_name_differ_p (dra, drb, differ_p);
813 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
814 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
815 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
816 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
818 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
824 /* An instruction writing through a restricted pointer is "independent" of any
825 instruction reading or writing through a different pointer, in the same
827 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
828 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
837 /* Function vect_analyze_data_ref_dependence.
839 Return TRUE if there (might) exist a dependence between a memory-reference
840 DRA and a memory-reference DRB. */
843 vect_analyze_data_ref_dependence (struct data_reference *dra,
844 struct data_reference *drb,
845 loop_vec_info loop_vinfo)
848 struct data_dependence_relation *ddr;
849 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
850 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
852 unsigned int loop_depth = 0;
853 struct loop *loop_nest = loop;
854 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
855 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
857 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
859 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
862 "not vectorized: can't determine dependence between: ");
863 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
864 fprintf (vect_dump, " and ");
865 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
873 ddr = initialize_data_dependence_relation (dra, drb);
874 compute_affine_dependence (ddr);
876 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
879 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
881 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
884 "not vectorized: can't determine dependence between ");
885 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
886 fprintf (vect_dump, " and ");
887 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
892 /* Find loop depth. */
895 if (loop_nest->outer && loop_nest->outer->outer)
897 loop_nest = loop_nest->outer;
904 /* Compute distance vector. */
905 compute_subscript_distance (ddr);
906 build_classic_dist_vector (ddr, vect_loops_num, loop_nest->depth);
908 if (!DDR_DIST_VECT (ddr))
910 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
912 fprintf (vect_dump, "not vectorized: bad dist vector for ");
913 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
914 fprintf (vect_dump, " and ");
915 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
920 dist = DDR_DIST_VECT (ddr)[loop_depth];
922 /* Same loop iteration. */
923 if (dist % vectorization_factor == 0)
925 /* Two references with distance zero have the same alignment. */
926 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
927 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
928 if (vect_print_dump_info (REPORT_ALIGNMENT))
929 fprintf (vect_dump, "accesses have the same alignment.");
933 if (dist >= vectorization_factor)
934 /* Dependence distance does not create dependence, as far as vectorization
935 is concerned, in this case. */
938 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
941 "not vectorized: possible dependence between data-refs ");
942 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
943 fprintf (vect_dump, " and ");
944 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
951 /* Function vect_analyze_data_ref_dependences.
953 Examine all the data references in the loop, and make sure there do not
954 exist any data dependences between them. */
957 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
960 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
961 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
963 /* Examine store-store (output) dependences. */
965 if (vect_print_dump_info (REPORT_DETAILS))
966 fprintf (vect_dump, "=== vect_analyze_dependences ===");
968 if (vect_print_dump_info (REPORT_DETAILS))
969 fprintf (vect_dump, "compare all store-store pairs.");
971 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
973 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
975 struct data_reference *dra =
976 VARRAY_GENERIC_PTR (loop_write_refs, i);
977 struct data_reference *drb =
978 VARRAY_GENERIC_PTR (loop_write_refs, j);
979 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
984 /* Examine load-store (true/anti) dependences. */
986 if (vect_print_dump_info (REPORT_DETAILS))
987 fprintf (vect_dump, "compare all load-store pairs.");
989 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
991 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
993 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
994 struct data_reference *drb =
995 VARRAY_GENERIC_PTR (loop_write_refs, j);
996 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
1005 /* Function vect_compute_data_ref_alignment
1007 Compute the misalignment of the data reference DR.
1010 1. If during the misalignment computation it is found that the data reference
1011 cannot be vectorized then false is returned.
1012 2. DR_MISALIGNMENT (DR) is defined.
1014 FOR NOW: No analysis is actually performed. Misalignment is calculated
1015 only for trivial cases. TODO. */
1018 vect_compute_data_ref_alignment (struct data_reference *dr)
1020 tree stmt = DR_STMT (dr);
1021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1022 tree ref = DR_REF (dr);
1024 tree base, alignment;
1025 bool base_aligned_p;
1028 if (vect_print_dump_info (REPORT_DETAILS))
1029 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
1031 /* Initialize misalignment to unknown. */
1032 DR_MISALIGNMENT (dr) = -1;
1034 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
1035 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
1036 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
1037 vectype = STMT_VINFO_VECTYPE (stmt_info);
1041 if (vect_print_dump_info (REPORT_DETAILS))
1043 fprintf (vect_dump, "Unknown alignment for access: ");
1044 print_generic_expr (vect_dump, base, TDF_SLIM);
1049 if (!base_aligned_p)
1051 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
1053 if (vect_print_dump_info (REPORT_DETAILS))
1055 fprintf (vect_dump, "can't force alignment of ref: ");
1056 print_generic_expr (vect_dump, ref, TDF_SLIM);
1061 /* Force the alignment of the decl.
1062 NOTE: This is the only change to the code we make during
1063 the analysis phase, before deciding to vectorize the loop. */
1064 if (vect_print_dump_info (REPORT_DETAILS))
1065 fprintf (vect_dump, "force alignment");
1066 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
1067 DECL_USER_ALIGN (base) = 1;
1070 /* At this point we assume that the base is aligned. */
1071 gcc_assert (base_aligned_p
1072 || (TREE_CODE (base) == VAR_DECL
1073 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
1075 /* Alignment required, in bytes: */
1076 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
1078 /* Modulo alignment. */
1079 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
1080 if (tree_int_cst_sgn (misalign) < 0)
1082 /* Negative misalignment value. */
1083 if (vect_print_dump_info (REPORT_DETAILS))
1084 fprintf (vect_dump, "unexpected misalign value");
1088 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
1090 if (vect_print_dump_info (REPORT_DETAILS))
1091 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
1097 /* Function vect_compute_data_refs_alignment
1099 Compute the misalignment of data references in the loop.
1100 This pass may take place at function granularity instead of at loop
1103 FOR NOW: No analysis is actually performed. Misalignment is calculated
1104 only for trivial cases. TODO. */
1107 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
1109 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1110 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1113 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1115 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1116 if (!vect_compute_data_ref_alignment (dr))
1120 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1122 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1123 if (!vect_compute_data_ref_alignment (dr))
1131 /* Function vect_enhance_data_refs_alignment
1133 This pass will use loop versioning and loop peeling in order to enhance
1134 the alignment of data references in the loop.
1136 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1137 original loop is to be vectorized; Any other loops that are created by
1138 the transformations performed in this pass - are not supposed to be
1139 vectorized. This restriction will be relaxed. */
1142 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1144 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1145 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1146 varray_type datarefs;
1147 VEC(dr_p,heap) *same_align_drs;
1148 struct data_reference *dr0 = NULL;
1149 struct data_reference *dr;
1153 This pass will require a cost model to guide it whether to apply peeling
1154 or versioning or a combination of the two. For example, the scheme that
1155 intel uses when given a loop with several memory accesses, is as follows:
1156 choose one memory access ('p') which alignment you want to force by doing
1157 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1158 other accesses are not necessarily aligned, or (2) use loop versioning to
1159 generate one loop in which all accesses are aligned, and another loop in
1160 which only 'p' is necessarily aligned.
1162 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1163 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1164 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1166 Devising a cost model is the most critical aspect of this work. It will
1167 guide us on which access to peel for, whether to use loop versioning, how
1168 many versions to create, etc. The cost model will probably consist of
1169 generic considerations as well as target specific considerations (on
1170 powerpc for example, misaligned stores are more painful than misaligned
1173 Here is the general steps involved in alignment enhancements:
1175 -- original loop, before alignment analysis:
1176 for (i=0; i<N; i++){
1177 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1178 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1181 -- After vect_compute_data_refs_alignment:
1182 for (i=0; i<N; i++){
1183 x = q[i]; # DR_MISALIGNMENT(q) = 3
1184 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1187 -- Possibility 1: we do loop versioning:
1189 for (i=0; i<N; i++){ # loop 1A
1190 x = q[i]; # DR_MISALIGNMENT(q) = 3
1191 p[i] = y; # DR_MISALIGNMENT(p) = 0
1195 for (i=0; i<N; i++){ # loop 1B
1196 x = q[i]; # DR_MISALIGNMENT(q) = 3
1197 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1201 -- Possibility 2: we do loop peeling:
1202 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1206 for (i = 3; i < N; i++){ # loop 2A
1207 x = q[i]; # DR_MISALIGNMENT(q) = 0
1208 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1211 -- Possibility 3: combination of loop peeling and versioning:
1212 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1217 for (i = 3; i<N; i++){ # loop 3A
1218 x = q[i]; # DR_MISALIGNMENT(q) = 0
1219 p[i] = y; # DR_MISALIGNMENT(p) = 0
1223 for (i = 3; i<N; i++){ # loop 3B
1224 x = q[i]; # DR_MISALIGNMENT(q) = 0
1225 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1229 These loops are later passed to loop_transform to be vectorized. The
1230 vectorizer will use the alignment information to guide the transformation
1231 (whether to generate regular loads/stores, or with special handling for
1235 /* (1) Peeling to force alignment. */
1237 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1239 + How many accesses will become aligned due to the peeling
1240 - How many accesses will become unaligned due to the peeling,
1241 and the cost of misaligned accesses.
1242 - The cost of peeling (the extra runtime checks, the increase
1245 The scheme we use FORNOW: peel to force the alignment of the first
1246 misaligned store in the loop.
1247 Rationale: misaligned stores are not yet supported.
1249 TODO: Use a better cost model. */
1251 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1253 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1254 if (!aligned_access_p (dr0))
1256 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1257 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1262 /* (1.2) Update the alignment info according to the peeling factor.
1263 If the misalignment of the DR we peel for is M, then the
1264 peeling factor is VF - M, and the misalignment of each access DR_i
1265 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1266 If the misalignment of the DR we peel for is unknown, then the
1267 misalignment of each access DR_i in the loop is also unknown.
1269 TODO: - consider accesses that are known to have the same
1270 alignment, even if that alignment is unknown. */
1272 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1277 if (known_alignment_for_access_p (dr0))
1279 /* Since it's known at compile time, compute the number of iterations
1280 in the peeled loop (the peeling factor) for use in updating
1281 DR_MISALIGNMENT values. The peeling factor is the vectorization
1282 factor minus the misalignment as an element count. */
1283 mis = DR_MISALIGNMENT (dr0);
1284 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1285 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1288 datarefs = loop_write_datarefs;
1289 for (j = 0; j < 2; j++)
1291 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1293 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1297 if (known_alignment_for_access_p (dr)
1298 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1299 DR_MISALIGNMENT (dr) = 0;
1300 else if (known_alignment_for_access_p (dr)
1301 && known_alignment_for_access_p (dr0))
1304 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1306 DR_MISALIGNMENT (dr) += npeel * drsize;
1307 DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
1310 DR_MISALIGNMENT (dr) = -1;
1312 datarefs = loop_read_datarefs;
1316 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr0)));
1317 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, dr); i++)
1319 DR_MISALIGNMENT (dr) = 0;
1322 DR_MISALIGNMENT (dr0) = 0;
1327 /* Function vect_analyze_data_refs_alignment
1329 Analyze the alignment of the data-references in the loop.
1330 FOR NOW: Until support for misaligned accesses is in place, only if all
1331 accesses are aligned can the loop be vectorized. This restriction will be
1335 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1337 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1338 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1339 enum dr_alignment_support supportable_dr_alignment;
1342 if (vect_print_dump_info (REPORT_DETAILS))
1343 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1346 /* This pass may take place at function granularity instead of at loop
1349 if (!vect_compute_data_refs_alignment (loop_vinfo))
1351 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1353 "not vectorized: can't calculate alignment for data ref.");
1358 /* This pass will decide on using loop versioning and/or loop peeling in
1359 order to enhance the alignment of data references in the loop. */
1361 vect_enhance_data_refs_alignment (loop_vinfo);
1364 /* Finally, check that all the data references in the loop can be
1365 handled with respect to their alignment. */
1367 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1369 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1370 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1371 if (!supportable_dr_alignment)
1373 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1374 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1377 if (supportable_dr_alignment != dr_aligned
1378 && (vect_print_dump_info (REPORT_ALIGNMENT)))
1379 fprintf (vect_dump, "Vectorizing an unaligned access.");
1381 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1383 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1384 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1385 if (!supportable_dr_alignment)
1387 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1388 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1391 if (supportable_dr_alignment != dr_aligned
1392 && (vect_print_dump_info (REPORT_ALIGNMENT)))
1393 fprintf (vect_dump, "Vectorizing an unaligned access.");
1395 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1396 && vect_print_dump_info (REPORT_ALIGNMENT))
1397 fprintf (vect_dump, "Alignment of access forced using peeling.");
1403 /* Function vect_analyze_data_ref_access.
1405 Analyze the access pattern of the data-reference DR. For now, a data access
1406 has to consecutive to be considered vectorizable. */
1409 vect_analyze_data_ref_access (struct data_reference *dr)
1411 tree stmt = DR_STMT (dr);
1412 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1413 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1414 tree scalar_type = TREE_TYPE (DR_REF (dr));
1416 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1418 if (vect_print_dump_info (REPORT_DETAILS))
1419 fprintf (vect_dump, "not consecutive access");
1426 /* Function vect_analyze_data_ref_accesses.
1428 Analyze the access pattern of all the data references in the loop.
1430 FORNOW: the only access pattern that is considered vectorizable is a
1431 simple step 1 (consecutive) access.
1433 FORNOW: handle only arrays and pointer accesses. */
1436 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1439 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1440 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1442 if (vect_print_dump_info (REPORT_DETAILS))
1443 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1445 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1447 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1448 bool ok = vect_analyze_data_ref_access (dr);
1451 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1452 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1457 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1459 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1460 bool ok = vect_analyze_data_ref_access (dr);
1463 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1464 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1473 /* Function vect_analyze_pointer_ref_access.
1476 STMT - a stmt that contains a data-ref.
1477 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1478 ACCESS_FN - the access function of MEMREF.
1481 If the data-ref access is vectorizable, return a data_reference structure
1482 that represents it (DR). Otherwise - return NULL.
1483 STEP - the stride of MEMREF in the loop.
1484 INIT - the initial condition of MEMREF in the loop.
1487 static struct data_reference *
1488 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1489 tree access_fn, tree *ptr_init, tree *ptr_step)
1491 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1492 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1493 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1495 tree reftype, innertype;
1496 tree indx_access_fn;
1497 int loopnum = loop->num;
1498 struct data_reference *dr;
1500 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1502 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1503 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1509 if (!expr_invariant_in_loop_p (loop, init))
1511 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1513 "not vectorized: initial condition is not loop invariant.");
1517 if (TREE_CODE (step) != INTEGER_CST)
1519 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1521 "not vectorized: non constant step for pointer access.");
1525 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1526 if (!POINTER_TYPE_P (reftype))
1528 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1529 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1533 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1535 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1536 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1540 *ptr_step = fold_convert (ssizetype, step);
1541 innertype = TREE_TYPE (reftype);
1542 if (!COMPLETE_TYPE_P (innertype))
1544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1545 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1549 /* Check that STEP is a multiple of type size. */
1550 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1551 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1553 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1554 fprintf (vect_dump, "not vectorized: non consecutive access.");
1559 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1560 if (vect_print_dump_info (REPORT_DETAILS))
1562 fprintf (vect_dump, "Access function of ptr indx: ");
1563 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1565 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1571 /* Function vect_address_analysis
1573 Return the BASE of the address expression EXPR.
1574 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1577 EXPR - the address expression that is being analyzed
1578 STMT - the statement that contains EXPR or its original memory reference
1579 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1580 VECTYPE - the type that defines the alignment (i.e, we compute
1581 alignment relative to TYPE_ALIGN(VECTYPE))
1582 DR - data_reference struct for the original memory reference
1585 BASE (returned value) - the base of the data reference EXPR.
1586 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1587 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1588 computation is impossible
1589 STEP - evolution of EXPR in the loop
1590 BASE_ALIGNED - indicates if BASE is aligned
1592 If something unexpected is encountered (an unsupported form of data-ref),
1593 then NULL_TREE is returned.
1597 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1598 struct data_reference *dr, tree *offset, tree *misalign,
1599 tree *step, bool *base_aligned)
1601 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1602 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1604 struct ptr_info_def *dummy1;
1607 switch (TREE_CODE (expr))
1611 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1612 oprnd0 = TREE_OPERAND (expr, 0);
1613 oprnd1 = TREE_OPERAND (expr, 1);
1615 STRIP_NOPS (oprnd0);
1616 STRIP_NOPS (oprnd1);
1618 /* Recursively try to find the base of the address contained in EXPR.
1619 For offset, the returned base will be NULL. */
1620 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1621 &address_offset, &address_misalign, step,
1624 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1625 &address_offset, &address_misalign, step,
1628 /* We support cases where only one of the operands contains an
1630 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1633 /* To revert STRIP_NOPS. */
1634 oprnd0 = TREE_OPERAND (expr, 0);
1635 oprnd1 = TREE_OPERAND (expr, 1);
1637 offset_expr = base_addr0 ?
1638 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1640 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1641 a number, we can add it to the misalignment value calculated for base,
1642 otherwise, misalignment is NULL. */
1643 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1644 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1647 *misalign = NULL_TREE;
1649 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1651 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1652 return base_addr0 ? base_addr0 : base_addr1;
1655 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1656 is_read, vectype, &dr, offset,
1657 misalign, step, base_aligned,
1658 &dummy, &dummy1, &dummy2);
1659 return base_address;
1662 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1665 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1667 if (vect_get_ptr_offset (expr, vectype, misalign))
1668 *base_aligned = true;
1670 *base_aligned = false;
1674 *base_aligned = true;
1675 *misalign = ssize_int (0);
1677 *offset = ssize_int (0);
1678 *step = ssize_int (0);
1687 /* Function vect_object_analysis
1689 Return the BASE of the data reference MEMREF.
1690 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1691 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1692 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1693 instantiated with initial_conditions of access_functions of variables,
1694 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1696 Function get_inner_reference is used for the above in case of ARRAY_REF and
1699 The structure of the function is as follows:
1701 Case 1. For handled_component_p refs
1702 1.1 call get_inner_reference
1703 1.1.1 analyze offset expr received from get_inner_reference
1704 1.2. build data-reference structure for MEMREF
1705 (fall through with BASE)
1706 Case 2. For declarations
1708 2.2 update DR_BASE_NAME if necessary for alias
1709 Case 3. For INDIRECT_REFs
1710 3.1 get the access function
1711 3.2 analyze evolution of MEMREF
1712 3.3 set data-reference structure for MEMREF
1713 3.4 call vect_address_analysis to analyze INIT of the access function
1716 Combine the results of object and address analysis to calculate
1717 INITIAL_OFFSET, STEP and misalignment info.
1720 MEMREF - the memory reference that is being analyzed
1721 STMT - the statement that contains MEMREF
1722 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1723 VECTYPE - the type that defines the alignment (i.e, we compute
1724 alignment relative to TYPE_ALIGN(VECTYPE))
1727 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1728 E.g, if MEMREF is a.b[k].c[i][j] the returned
1730 DR - data_reference struct for MEMREF
1731 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1732 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1733 the computation is impossible
1734 STEP - evolution of the DR_REF in the loop
1735 BASE_ALIGNED - indicates if BASE is aligned
1736 MEMTAG - memory tag for aliasing purposes
1737 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1738 SUBVAR - Sub-variables of the variable
1740 If something unexpected is encountered (an unsupported form of data-ref),
1741 then NULL_TREE is returned. */
1744 vect_object_analysis (tree memref, tree stmt, bool is_read,
1745 tree vectype, struct data_reference **dr,
1746 tree *offset, tree *misalign, tree *step,
1747 bool *base_aligned, tree *memtag,
1748 struct ptr_info_def **ptr_info, subvar_t *subvars)
1750 tree base = NULL_TREE, base_address = NULL_TREE;
1751 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1752 tree object_step = ssize_int (0), address_step = ssize_int (0);
1753 bool object_base_aligned = true, address_base_aligned = true;
1754 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1755 HOST_WIDE_INT pbitsize, pbitpos;
1756 tree poffset, bit_pos_in_bytes;
1757 enum machine_mode pmode;
1758 int punsignedp, pvolatilep;
1759 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1760 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1761 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1762 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1763 struct data_reference *ptr_dr = NULL;
1764 tree access_fn, evolution_part, address_to_analyze;
1769 /* Case 1. handled_component_p refs. */
1770 if (handled_component_p (memref))
1772 /* 1.1 call get_inner_reference. */
1773 /* Find the base and the offset from it. */
1774 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1775 &pmode, &punsignedp, &pvolatilep, false);
1779 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1781 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1782 &object_offset, &object_misalign, &object_step))
1784 if (vect_print_dump_info (REPORT_DETAILS))
1786 fprintf (vect_dump, "failed to compute offset or step for ");
1787 print_generic_expr (vect_dump, memref, TDF_SLIM);
1792 /* Add bit position to OFFSET and MISALIGN. */
1794 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1795 /* Check that there is no remainder in bits. */
1796 if (pbitpos%BITS_PER_UNIT)
1798 if (vect_print_dump_info (REPORT_DETAILS))
1799 fprintf (vect_dump, "bit offset alignment.");
1802 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1803 if (object_misalign)
1804 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1807 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1810 if (TREE_CODE (memref) == ARRAY_REF)
1811 *dr = analyze_array (stmt, memref, is_read);
1816 memref = base; /* To continue analysis of BASE. */
1820 /* Part 1: Case 2. Declarations. */
1821 if (DECL_P (memref))
1823 /* We expect to get a decl only if we already have a DR. */
1826 if (vect_print_dump_info (REPORT_DETAILS))
1828 fprintf (vect_dump, "unhandled decl ");
1829 print_generic_expr (vect_dump, memref, TDF_SLIM);
1834 /* 2.1 check the alignment. */
1835 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1836 object_base_aligned = true;
1838 object_base_aligned = false;
1840 /* 2.2 update DR_BASE_NAME if necessary. */
1841 if (!DR_BASE_NAME ((*dr)))
1842 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1844 DR_BASE_NAME ((*dr)) = memref;
1846 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1847 *subvars = get_subvars_for_var (memref);
1848 base_address = build_fold_addr_expr (memref);
1852 /* Part 1: Case 3. INDIRECT_REFs. */
1853 else if (TREE_CODE (memref) == INDIRECT_REF)
1855 tree ptr_ref = TREE_OPERAND (memref, 0);
1856 if (TREE_CODE (ptr_ref) == SSA_NAME)
1857 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1859 /* 3.1 get the access function. */
1860 access_fn = analyze_scalar_evolution (loop, ptr_ref);
1863 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1864 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1867 if (vect_print_dump_info (REPORT_DETAILS))
1869 fprintf (vect_dump, "Access function of ptr: ");
1870 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1873 /* 3.2 analyze evolution of MEMREF. */
1874 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1877 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1878 access_fn, &ptr_init, &ptr_step);
1882 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1883 address_to_analyze = ptr_init;
1889 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1890 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1893 /* Since there exists DR for MEMREF, we are analyzing the init of
1894 the access function, which not necessary has evolution in the
1896 address_to_analyze = initial_condition_in_loop_num (access_fn,
1900 /* 3.3 set data-reference structure for MEMREF. */
1901 *dr = (*dr) ? *dr : ptr_dr;
1903 /* 3.4 call vect_address_analysis to analyze INIT of the access
1905 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1906 vectype, *dr, &address_offset, &address_misalign,
1907 &address_step, &address_base_aligned);
1911 switch (TREE_CODE (base_address))
1914 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1915 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1916 *memtag = get_var_ann (
1917 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1920 *memtag = TREE_OPERAND (base_address, 0);
1923 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1925 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1926 print_generic_expr (vect_dump, memref, TDF_SLIM);
1933 /* MEMREF cannot be analyzed. */
1936 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1937 *subvars = get_subvars_for_var (*memtag);
1939 /* Part 2: Combine the results of object and address analysis to calculate
1940 INITIAL_OFFSET, STEP and misalignment info. */
1941 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1942 if (object_misalign && address_misalign)
1943 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1945 *misalign = NULL_TREE;
1946 *step = size_binop (PLUS_EXPR, object_step, address_step);
1947 *base_aligned = object_base_aligned && address_base_aligned;
1949 if (vect_print_dump_info (REPORT_DETAILS))
1951 fprintf (vect_dump, "Results of object analysis for: ");
1952 print_generic_expr (vect_dump, memref, TDF_SLIM);
1953 fprintf (vect_dump, "\n\tbase_address: ");
1954 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1955 fprintf (vect_dump, "\n\toffset: ");
1956 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1957 fprintf (vect_dump, "\n\tstep: ");
1958 print_generic_expr (vect_dump, *step, TDF_SLIM);
1959 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1960 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1962 return base_address;
1966 /* Function vect_analyze_data_refs.
1968 Find all the data references in the loop.
1970 The general structure of the analysis of data refs in the vectorizer is as
1972 1- vect_analyze_data_refs(loop):
1973 Find and analyze all data-refs in the loop:
1975 base_address = vect_object_analysis(ref)
1976 1.1- vect_object_analysis(ref):
1977 Analyze ref, and build a DR (data_reference struct) for it;
1978 compute base, initial_offset, step and alignment.
1979 Call get_inner_reference for refs handled in this function.
1980 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1981 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1982 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
1983 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1984 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1985 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1987 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1988 which base is really an array (not a pointer) and which alignment
1989 can be forced. This restriction will be relaxed. */
1992 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1994 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1995 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1996 int nbbs = loop->num_nodes;
1997 block_stmt_iterator si;
1999 struct data_reference *dr;
2001 if (vect_print_dump_info (REPORT_DETAILS))
2002 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
2004 for (j = 0; j < nbbs; j++)
2006 basic_block bb = bbs[j];
2007 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2009 bool is_read = false;
2010 tree stmt = bsi_stmt (si);
2011 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2012 varray_type *datarefs = NULL;
2014 tree scalar_type, vectype;
2015 tree base, offset, misalign, step, tag;
2016 struct ptr_info_def *ptr_info;
2018 subvar_t subvars = NULL;
2019 bool no_vuse, no_vmaymust;
2021 /* Assumption: there exists a data-ref in stmt, if and only if
2022 it has vuses/vdefs. */
2024 no_vuse = ZERO_SSA_OPERANDS (stmt, SSA_OP_VUSE);
2025 no_vmaymust = ZERO_SSA_OPERANDS (stmt,
2026 SSA_OP_VMAYDEF | SSA_OP_VMUSTDEF);
2027 if (no_vuse && no_vmaymust)
2030 if (!no_vuse && !no_vmaymust)
2032 if (vect_print_dump_info (REPORT_DETAILS))
2034 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
2035 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2040 if (TREE_CODE (stmt) != MODIFY_EXPR)
2042 if (vect_print_dump_info (REPORT_DETAILS))
2044 fprintf (vect_dump, "unexpected vops in stmt: ");
2045 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2052 memref = TREE_OPERAND (stmt, 1);
2053 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
2058 memref = TREE_OPERAND (stmt, 0);
2059 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
2063 scalar_type = TREE_TYPE (memref);
2064 vectype = get_vectype_for_scalar_type (scalar_type);
2067 if (vect_print_dump_info (REPORT_DETAILS))
2069 fprintf (vect_dump, "no vectype for stmt: ");
2070 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2071 fprintf (vect_dump, " scalar_type: ");
2072 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2074 /* It is not possible to vectorize this data reference. */
2077 /* Analyze MEMREF. If it is of a supported form, build data_reference
2078 struct for it (DR). */
2080 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
2081 &offset, &misalign, &step,
2082 &base_aligned, &tag, &ptr_info,
2086 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2088 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
2089 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2093 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
2094 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2095 STMT_VINFO_VECT_STEP (stmt_info) = step;
2096 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
2097 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
2098 STMT_VINFO_MEMTAG (stmt_info) = tag;
2099 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
2100 STMT_VINFO_SUBVARS (stmt_info) = subvars;
2101 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2102 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
2103 STMT_VINFO_DATA_REF (stmt_info) = dr;
2111 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
2113 /* Function vect_mark_relevant.
2115 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2118 vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
2119 bool relevant_p, bool live_p)
2121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2122 bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
2123 bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
2125 if (vect_print_dump_info (REPORT_DETAILS))
2126 fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
2128 STMT_VINFO_LIVE_P (stmt_info) |= live_p;
2129 STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
2131 if (TREE_CODE (stmt) == PHI_NODE)
2132 /* Don't put phi-nodes in the worklist. Phis that are marked relevant
2133 or live will fail vectorization later on. */
2136 if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
2137 && STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
2139 if (vect_print_dump_info (REPORT_DETAILS))
2140 fprintf (vect_dump, "already marked relevant/live.");
2144 VEC_safe_push (tree, heap, *worklist, stmt);
2148 /* Function vect_stmt_relevant_p.
2150 Return true if STMT in loop that is represented by LOOP_VINFO is
2151 "relevant for vectorization".
2153 A stmt is considered "relevant for vectorization" if:
2154 - it has uses outside the loop.
2155 - it has vdefs (it alters memory).
2156 - control stmts in the loop (except for the exit condition).
2158 CHECKME: what other side effects would the vectorizer allow? */
2161 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
2162 bool *relevant_p, bool *live_p)
2164 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2165 ssa_op_iter op_iter;
2166 imm_use_iterator imm_iter;
2167 use_operand_p use_p;
2168 def_operand_p def_p;
2170 *relevant_p = false;
2173 /* cond stmt other than loop exit cond. */
2174 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2177 /* changing memory. */
2178 if (TREE_CODE (stmt) != PHI_NODE)
2179 if (!ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
2181 if (vect_print_dump_info (REPORT_DETAILS))
2182 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2186 /* uses outside the loop. */
2187 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF)
2189 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, DEF_FROM_PTR (def_p))
2191 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2192 if (!flow_bb_inside_loop_p (loop, bb))
2194 if (vect_print_dump_info (REPORT_DETAILS))
2195 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2197 /* We expect all such uses to be in the loop exit phis
2198 (because of loop closed form) */
2199 gcc_assert (TREE_CODE (USE_STMT (use_p)) == PHI_NODE);
2200 gcc_assert (bb == loop->single_exit->dest);
2207 return (*live_p || *relevant_p);
2211 /* Function vect_mark_stmts_to_be_vectorized.
2213 Not all stmts in the loop need to be vectorized. For example:
2222 Stmt 1 and 3 do not need to be vectorized, because loop control and
2223 addressing of vectorized data-refs are handled differently.
2225 This pass detects such stmts. */
2228 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2230 VEC(tree,heap) *worklist;
2231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2232 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2233 unsigned int nbbs = loop->num_nodes;
2234 block_stmt_iterator si;
2239 stmt_vec_info stmt_vinfo;
2242 bool relevant_p, live_p;
2244 enum vect_def_type dt;
2246 if (vect_print_dump_info (REPORT_DETAILS))
2247 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2249 worklist = VEC_alloc (tree, heap, 64);
2251 /* 1. Init worklist. */
2254 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2256 if (vect_print_dump_info (REPORT_DETAILS))
2258 fprintf (vect_dump, "init: phi relevant? ");
2259 print_generic_expr (vect_dump, phi, TDF_SLIM);
2262 if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
2263 vect_mark_relevant (&worklist, phi, relevant_p, live_p);
2266 for (i = 0; i < nbbs; i++)
2269 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2271 stmt = bsi_stmt (si);
2273 if (vect_print_dump_info (REPORT_DETAILS))
2275 fprintf (vect_dump, "init: stmt relevant? ");
2276 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2279 if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
2280 vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
2285 /* 2. Process_worklist */
2287 while (VEC_length (tree, worklist) > 0)
2289 stmt = VEC_pop (tree, worklist);
2291 if (vect_print_dump_info (REPORT_DETAILS))
2293 fprintf (vect_dump, "worklist: examine stmt: ");
2294 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2297 /* Examine the USEs of STMT. For each ssa-name USE thta is defined
2298 in the loop, mark the stmt that defines it (DEF_STMT) as
2299 relevant/irrelevant and live/dead according to the liveness and
2300 relevance properties of STMT.
2303 gcc_assert (TREE_CODE (stmt) != PHI_NODE);
2305 ann = stmt_ann (stmt);
2306 stmt_vinfo = vinfo_for_stmt (stmt);
2308 relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
2309 live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
2311 /* Generally, the liveness and relevance properties of STMT are
2312 propagated to the DEF_STMTs of its USEs:
2313 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
2314 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
2319 If USE is used only for address computations (e.g. array indexing),
2320 which does not need to be directly vectorized, then the
2321 liveness/relevance of the respective DEF_STMT is left unchanged.
2324 If STMT has been identified as defining a reduction variable, then
2327 The last use of STMT is the reduction-variable, which is defined
2328 by a loop-header-phi. We don't want to mark the phi as live or
2329 relevant (because it does not need to be vectorized, it is handled
2330 as part of the vectorization of the reduction), so in this case we
2331 skip the call to vect_mark_relevant.
2333 The rest of the uses of STMT are defined in the loop body. For
2334 the def_stmt of these uses we want to set liveness/relevance
2336 STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
2337 STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
2338 because even though STMT is classified as live (since it defines a
2339 value that is used across loop iterations) and irrelevant (since it
2340 is not used inside the loop), it will be vectorized, and therefore
2341 the corresponding DEF_STMTs need to marked as relevant.
2345 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
2347 gcc_assert (!relevant_p && live_p);
2352 FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
2354 /* case 1: we are only interested in uses that need to be vectorized.
2355 Uses that are used for address computation are not considered
2358 if (!exist_non_indexing_operands_for_use_p (use, stmt))
2361 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt, &def, &dt))
2363 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2364 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2365 VEC_free (tree, heap, worklist);
2369 if (!def_stmt || IS_EMPTY_STMT (def_stmt))
2372 if (vect_print_dump_info (REPORT_DETAILS))
2374 fprintf (vect_dump, "worklist: examine use %d: ", i);
2375 print_generic_expr (vect_dump, use, TDF_SLIM);
2378 bb = bb_for_stmt (def_stmt);
2379 if (!flow_bb_inside_loop_p (loop, bb))
2382 /* case 2.1: the reduction-use does not mark the defining-phi
2384 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2385 && TREE_CODE (def_stmt) == PHI_NODE)
2388 vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
2390 } /* while worklist */
2392 VEC_free (tree, heap, worklist);
2397 /* Function vect_can_advance_ivs_p
2399 In case the number of iterations that LOOP iterates in unknown at compile
2400 time, an epilog loop will be generated, and the loop induction variables
2401 (IVs) will be "advanced" to the value they are supposed to take just before
2402 the epilog loop. Here we check that the access function of the loop IVs
2403 and the expression that represents the loop bound are simple enough.
2404 These restrictions will be relaxed in the future. */
2407 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2409 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2410 basic_block bb = loop->header;
2413 /* Analyze phi functions of the loop header. */
2415 if (vect_print_dump_info (REPORT_DETAILS))
2416 fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
2418 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2420 tree access_fn = NULL;
2421 tree evolution_part;
2423 if (vect_print_dump_info (REPORT_DETAILS))
2425 fprintf (vect_dump, "Analyze phi: ");
2426 print_generic_expr (vect_dump, phi, TDF_SLIM);
2429 /* Skip virtual phi's. The data dependences that are associated with
2430 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2432 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2434 if (vect_print_dump_info (REPORT_DETAILS))
2435 fprintf (vect_dump, "virtual phi. skip.");
2439 /* Skip reduction phis. */
2441 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2443 if (vect_print_dump_info (REPORT_DETAILS))
2444 fprintf (vect_dump, "reduc phi. skip.");
2448 /* Analyze the evolution function. */
2450 access_fn = instantiate_parameters
2451 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "No Access function.");
2460 if (vect_print_dump_info (REPORT_DETAILS))
2462 fprintf (vect_dump, "Access function of PHI: ");
2463 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2466 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2468 if (evolution_part == NULL_TREE)
2470 if (vect_print_dump_info (REPORT_DETAILS))
2471 fprintf (vect_dump, "No evolution.");
2475 /* FORNOW: We do not transform initial conditions of IVs
2476 which evolution functions are a polynomial of degree >= 2. */
2478 if (tree_is_chrec (evolution_part))
2486 /* Function vect_get_loop_niters.
2488 Determine how many iterations the loop is executed.
2489 If an expression that represents the number of iterations
2490 can be constructed, place it in NUMBER_OF_ITERATIONS.
2491 Return the loop exit condition. */
2494 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2498 if (vect_print_dump_info (REPORT_DETAILS))
2499 fprintf (vect_dump, "=== get_loop_niters ===");
2501 niters = number_of_iterations_in_loop (loop);
2503 if (niters != NULL_TREE
2504 && niters != chrec_dont_know)
2506 *number_of_iterations = niters;
2508 if (vect_print_dump_info (REPORT_DETAILS))
2510 fprintf (vect_dump, "==> get_loop_niters:" );
2511 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2515 return get_loop_exit_condition (loop);
2519 /* Function vect_analyze_loop_form.
2521 Verify the following restrictions (some may be relaxed in the future):
2522 - it's an inner-most loop
2523 - number of BBs = 2 (which are the loop header and the latch)
2524 - the loop has a pre-header
2525 - the loop has a single entry and exit
2526 - the loop exit condition is simple enough, and the number of iterations
2527 can be analyzed (a countable loop). */
2529 static loop_vec_info
2530 vect_analyze_loop_form (struct loop *loop)
2532 loop_vec_info loop_vinfo;
2534 tree number_of_iterations = NULL;
2536 if (vect_print_dump_info (REPORT_DETAILS))
2537 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2541 if (vect_print_dump_info (REPORT_OUTER_LOOPS))
2542 fprintf (vect_dump, "not vectorized: nested loop.");
2546 if (!loop->single_exit
2547 || loop->num_nodes != 2
2548 || EDGE_COUNT (loop->header->preds) != 2)
2550 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2552 if (!loop->single_exit)
2553 fprintf (vect_dump, "not vectorized: multiple exits.");
2554 else if (loop->num_nodes != 2)
2555 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2556 else if (EDGE_COUNT (loop->header->preds) != 2)
2557 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2563 /* We assume that the loop exit condition is at the end of the loop. i.e,
2564 that the loop is represented as a do-while (with a proper if-guard
2565 before the loop if needed), where the loop header contains all the
2566 executable statements, and the latch is empty. */
2567 if (!empty_block_p (loop->latch))
2569 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2570 fprintf (vect_dump, "not vectorized: unexpected loop form.");
2574 /* Make sure there exists a single-predecessor exit bb: */
2575 if (!single_pred_p (loop->single_exit->dest))
2577 edge e = loop->single_exit;
2578 if (!(e->flags & EDGE_ABNORMAL))
2580 split_loop_exit_edge (e);
2581 if (vect_print_dump_info (REPORT_DETAILS))
2582 fprintf (vect_dump, "split exit edge.");
2586 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2587 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2592 if (empty_block_p (loop->header))
2594 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2595 fprintf (vect_dump, "not vectorized: empty loop.");
2599 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2602 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2603 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2607 if (!number_of_iterations)
2609 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2611 "not vectorized: number of iterations cannot be computed.");
2615 if (chrec_contains_undetermined (number_of_iterations))
2617 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS))
2618 fprintf (vect_dump, "Infinite number of iterations.");
2622 loop_vinfo = new_loop_vec_info (loop);
2623 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2625 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2627 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "Symbolic number of iterations is ");
2630 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2634 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2636 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
2637 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2641 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2647 /* Function vect_analyze_loop.
2649 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2650 for it. The different analyses will record information in the
2651 loop_vec_info struct. */
2653 vect_analyze_loop (struct loop *loop)
2656 loop_vec_info loop_vinfo;
2658 if (vect_print_dump_info (REPORT_DETAILS))
2659 fprintf (vect_dump, "===== analyze_loop_nest =====");
2661 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2663 loop_vinfo = vect_analyze_loop_form (loop);
2666 if (vect_print_dump_info (REPORT_DETAILS))
2667 fprintf (vect_dump, "bad loop form.");
2671 /* Find all data references in the loop (which correspond to vdefs/vuses)
2672 and analyze their evolution in the loop.
2674 FORNOW: Handle only simple, array references, which
2675 alignment can be forced, and aligned pointer-references. */
2677 ok = vect_analyze_data_refs (loop_vinfo);
2680 if (vect_print_dump_info (REPORT_DETAILS))
2681 fprintf (vect_dump, "bad data references.");
2682 destroy_loop_vec_info (loop_vinfo);
2686 /* Classify all cross-iteration scalar data-flow cycles.
2687 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2689 vect_analyze_scalar_cycles (loop_vinfo);
2691 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2693 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2696 if (vect_print_dump_info (REPORT_DETAILS))
2697 fprintf (vect_dump, "unexpected pattern.");
2698 destroy_loop_vec_info (loop_vinfo);
2702 ok = vect_determine_vectorization_factor (loop_vinfo);
2705 if (vect_print_dump_info (REPORT_DETAILS))
2706 fprintf (vect_dump, "can't determine vectorization factor.");
2707 destroy_loop_vec_info (loop_vinfo);
2711 /* Analyze data dependences between the data-refs in the loop.
2712 FORNOW: fail at the first data dependence that we encounter. */
2714 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2717 if (vect_print_dump_info (REPORT_DETAILS))
2718 fprintf (vect_dump, "bad data dependence.");
2719 destroy_loop_vec_info (loop_vinfo);
2723 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2724 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2726 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2729 if (vect_print_dump_info (REPORT_DETAILS))
2730 fprintf (vect_dump, "bad data access.");
2731 destroy_loop_vec_info (loop_vinfo);
2735 /* Analyze the alignment of the data-refs in the loop.
2736 FORNOW: Only aligned accesses are handled. */
2738 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2741 if (vect_print_dump_info (REPORT_DETAILS))
2742 fprintf (vect_dump, "bad data alignment.");
2743 destroy_loop_vec_info (loop_vinfo);
2747 /* Scan all the operations in the loop and make sure they are
2750 ok = vect_analyze_operations (loop_vinfo);
2753 if (vect_print_dump_info (REPORT_DETAILS))
2754 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2755 destroy_loop_vec_info (loop_vinfo);
2759 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;