1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static bool vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static void vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
53 static bool vect_determine_vectorization_factor (loop_vec_info);
55 /* Utility functions for the analyses. */
56 static bool exist_non_indexing_operands_for_use_p (tree, tree);
57 static void vect_mark_relevant (varray_type *, tree);
58 static bool vect_stmt_relevant_p (tree, loop_vec_info);
59 static tree vect_get_loop_niters (struct loop *, tree *);
60 static bool vect_analyze_data_ref_dependence
61 (struct data_reference *, struct data_reference *, loop_vec_info);
62 static bool vect_compute_data_ref_alignment (struct data_reference *);
63 static bool vect_analyze_data_ref_access (struct data_reference *);
64 static struct data_reference * vect_analyze_pointer_ref_access
65 (tree, tree, bool, tree, tree *, tree *);
66 static bool vect_can_advance_ivs_p (loop_vec_info);
67 static tree vect_get_ptr_offset (tree, tree, tree *);
68 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
70 static bool vect_base_addr_differ_p (struct data_reference *,
71 struct data_reference *drb, bool *);
72 static tree vect_object_analysis (tree, tree, bool, tree,
73 struct data_reference **, tree *, tree *,
74 tree *, bool *, tree *, struct ptr_info_def **,
76 static tree vect_address_analysis (tree, tree, bool, tree,
77 struct data_reference *, tree *, tree *,
81 /* Function vect_get_ptr_offset
83 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
86 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
87 tree vectype ATTRIBUTE_UNUSED,
88 tree *offset ATTRIBUTE_UNUSED)
90 /* TODO: Use alignment information. */
95 /* Function vect_analyze_offset_expr
97 Given an offset expression EXPR received from get_inner_reference, analyze
98 it and create an expression for INITIAL_OFFSET by substituting the variables
99 of EXPR with initial_condition of the corresponding access_fn in the loop.
102 for (j = 3; j < N; j++)
105 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
106 substituted, since its access_fn in the inner loop is i. 'j' will be
107 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
110 Compute MISALIGN (the misalignment of the data reference initial access from
111 its base) if possible. Misalignment can be calculated only if all the
112 variables can be substituted with constants, or if a variable is multiplied
113 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
114 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
115 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
116 VECTYPE_ALIGNMENT computation in the caller of this function).
118 STEP is an evolution of the data reference in this loop in bytes.
119 In the above example, STEP is C_j.
121 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
122 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
123 are NULL_TREEs. Otherwise, return TRUE.
128 vect_analyze_offset_expr (tree expr,
130 tree vectype_alignment,
131 tree *initial_offset,
137 tree left_offset = ssize_int (0);
138 tree right_offset = ssize_int (0);
139 tree left_misalign = ssize_int (0);
140 tree right_misalign = ssize_int (0);
141 tree left_step = ssize_int (0);
142 tree right_step = ssize_int (0);
144 tree init, evolution;
147 *misalign = NULL_TREE;
148 *initial_offset = NULL_TREE;
150 /* Strip conversions that don't narrow the mode. */
151 expr = vect_strip_conversion (expr);
157 if (TREE_CODE (expr) == INTEGER_CST)
159 *initial_offset = fold_convert (ssizetype, expr);
160 *misalign = fold_convert (ssizetype, expr);
161 *step = ssize_int (0);
165 /* 2. Variable. Try to substitute with initial_condition of the corresponding
166 access_fn in the current loop. */
167 if (SSA_VAR_P (expr))
169 tree access_fn = analyze_scalar_evolution (loop, expr);
171 if (access_fn == chrec_dont_know)
175 init = initial_condition_in_loop_num (access_fn, loop->num);
176 if (init == expr && !expr_invariant_in_loop_p (loop, init))
177 /* Not enough information: may be not loop invariant.
178 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
179 initial_condition is D, but it depends on i - loop's induction
183 evolution = evolution_part_in_loop_num (access_fn, loop->num);
184 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
185 /* Evolution is not constant. */
188 if (TREE_CODE (init) == INTEGER_CST)
189 *misalign = fold_convert (ssizetype, init);
191 /* Not constant, misalignment cannot be calculated. */
192 *misalign = NULL_TREE;
194 *initial_offset = fold_convert (ssizetype, init);
196 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
200 /* Recursive computation. */
201 if (!BINARY_CLASS_P (expr))
203 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
204 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
206 fprintf (vect_dump, "Not binary expression ");
207 print_generic_expr (vect_dump, expr, TDF_SLIM);
211 oprnd0 = TREE_OPERAND (expr, 0);
212 oprnd1 = TREE_OPERAND (expr, 1);
214 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
215 &left_misalign, &left_step)
216 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
217 &right_offset, &right_misalign, &right_step))
220 /* The type of the operation: plus, minus or mult. */
221 code = TREE_CODE (expr);
225 if (TREE_CODE (right_offset) != INTEGER_CST)
226 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
228 FORNOW: We don't support such cases. */
231 /* Strip conversions that don't narrow the mode. */
232 left_offset = vect_strip_conversion (left_offset);
235 /* Misalignment computation. */
236 if (SSA_VAR_P (left_offset))
238 /* If the left side contains variables that can't be substituted with
239 constants, we check if the right side is a multiple of ALIGNMENT.
241 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
242 fold_convert (ssizetype, vectype_alignment))))
243 *misalign = ssize_int (0);
245 /* If the remainder is not zero or the right side isn't constant,
246 we can't compute misalignment. */
247 *misalign = NULL_TREE;
251 /* The left operand was successfully substituted with constant. */
253 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
255 *misalign = size_binop (code, left_misalign, right_misalign);
257 *misalign = NULL_TREE;
260 /* Step calculation. */
261 /* Multiply the step by the right operand. */
262 *step = size_binop (MULT_EXPR, left_step, right_offset);
267 /* Combine the recursive calculations for step and misalignment. */
268 *step = size_binop (code, left_step, right_step);
270 if (left_misalign && right_misalign)
271 *misalign = size_binop (code, left_misalign, right_misalign);
273 *misalign = NULL_TREE;
281 /* Compute offset. */
282 *initial_offset = fold_convert (ssizetype,
283 fold (build2 (code, TREE_TYPE (left_offset),
290 /* Function vect_determine_vectorization_factor
292 Determine the vectorization factor (VF). VF is the number of data elements
293 that are operated upon in parallel in a single iteration of the vectorized
294 loop. For example, when vectorizing a loop that operates on 4byte elements,
295 on a target with vector size (VS) 16byte, the VF is set to 4, since 4
296 elements can fit in a single vector register.
298 We currently support vectorization of loops in which all types operated upon
299 are of the same size. Therefore this function currently sets VF according to
300 the size of the types operated upon, and fails if there are multiple sizes
303 VF is also the factor by which the loop iterations are strip-mined, e.g.:
310 for (i=0; i<N; i+=VF){
311 a[i:VF] = b[i:VF] + c[i:VF];
316 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
318 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
319 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
320 int nbbs = loop->num_nodes;
321 block_stmt_iterator si;
322 unsigned int vectorization_factor = 0;
326 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
327 fprintf (vect_dump, "=== vect_determine_vectorization_factor ===");
329 for (i = 0; i < nbbs; i++)
331 basic_block bb = bbs[i];
333 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
335 tree stmt = bsi_stmt (si);
337 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
340 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
342 fprintf (vect_dump, "==> examining statement: ");
343 print_generic_expr (vect_dump, stmt, TDF_SLIM);
346 gcc_assert (stmt_info);
347 /* skip stmts which do not need to be vectorized. */
348 if (!STMT_VINFO_RELEVANT_P (stmt_info))
351 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
353 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
354 LOOP_LOC (loop_vinfo)))
356 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
357 print_generic_expr (vect_dump, stmt, TDF_SLIM);
362 if (STMT_VINFO_DATA_REF (stmt_info))
363 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
364 else if (TREE_CODE (stmt) == MODIFY_EXPR)
365 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
367 scalar_type = TREE_TYPE (stmt);
369 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
371 fprintf (vect_dump, "get vectype for scalar type: ");
372 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
375 vectype = get_vectype_for_scalar_type (scalar_type);
378 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
379 LOOP_LOC (loop_vinfo)))
381 fprintf (vect_dump, "not vectorized: unsupported data-type ");
382 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
386 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
388 fprintf (vect_dump, "vectype: ");
389 print_generic_expr (vect_dump, vectype, TDF_SLIM);
391 STMT_VINFO_VECTYPE (stmt_info) = vectype;
393 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
394 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
395 fprintf (vect_dump, "nunits = %d", nunits);
397 if (vectorization_factor)
399 /* FORNOW: don't allow mixed units.
400 This restriction will be relaxed in the future. */
401 if (nunits != vectorization_factor)
403 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
404 LOOP_LOC (loop_vinfo)))
405 fprintf (vect_dump, "not vectorized: mixed data-types");
410 vectorization_factor = nunits;
412 #ifdef ENABLE_CHECKING
413 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
414 * vectorization_factor == UNITS_PER_SIMD_WORD);
419 /* TODO: Analyze cost. Decide if worth while to vectorize. */
421 if (vectorization_factor <= 1)
423 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
424 LOOP_LOC (loop_vinfo)))
425 fprintf (vect_dump, "not vectorized: unsupported data-type");
428 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
434 /* Function vect_analyze_operations.
436 Scan the loop stmts and make sure they are all vectorizable. */
439 vect_analyze_operations (loop_vec_info loop_vinfo)
441 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
442 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
443 int nbbs = loop->num_nodes;
444 block_stmt_iterator si;
445 unsigned int vectorization_factor = 0;
449 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
450 fprintf (vect_dump, "=== vect_analyze_operations ===");
452 gcc_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
453 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
455 for (i = 0; i < nbbs; i++)
457 basic_block bb = bbs[i];
459 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
461 tree stmt = bsi_stmt (si);
462 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
464 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
466 fprintf (vect_dump, "==> examining statement: ");
467 print_generic_expr (vect_dump, stmt, TDF_SLIM);
470 gcc_assert (stmt_info);
472 /* skip stmts which do not need to be vectorized.
473 this is expected to include:
474 - the COND_EXPR which is the loop exit condition
475 - any LABEL_EXPRs in the loop
476 - computations that are used only for array indexing or loop
479 if (!STMT_VINFO_RELEVANT_P (stmt_info))
481 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
482 fprintf (vect_dump, "irrelevant.");
486 #ifdef ENABLE_CHECKING
487 if (STMT_VINFO_RELEVANT_P (stmt_info))
489 gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
490 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
494 ok = (vectorizable_operation (stmt, NULL, NULL)
495 || vectorizable_assignment (stmt, NULL, NULL)
496 || vectorizable_load (stmt, NULL, NULL)
497 || vectorizable_store (stmt, NULL, NULL));
501 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
502 LOOP_LOC (loop_vinfo)))
504 fprintf (vect_dump, "not vectorized: stmt not supported: ");
505 print_generic_expr (vect_dump, stmt, TDF_SLIM);
512 /* TODO: Analyze cost. Decide if worth while to vectorize. */
514 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
515 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
517 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
518 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
520 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
521 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
523 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
524 LOOP_LOC (loop_vinfo)))
525 fprintf (vect_dump, "not vectorized: iteration count too small.");
529 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
530 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
532 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
533 fprintf (vect_dump, "epilog loop required.");
534 if (!vect_can_advance_ivs_p (loop_vinfo))
536 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
537 LOOP_LOC (loop_vinfo)))
539 "not vectorized: can't create epilog loop 1.");
542 if (!slpeel_can_duplicate_loop_p (loop, loop->single_exit))
544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
545 LOOP_LOC (loop_vinfo)))
547 "not vectorized: can't create epilog loop 2.");
556 /* Function exist_non_indexing_operands_for_use_p
558 USE is one of the uses attached to STMT. Check if USE is
559 used in STMT for anything other than indexing an array. */
562 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
565 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
567 /* USE corresponds to some operand in STMT. If there is no data
568 reference in STMT, then any operand that corresponds to USE
569 is not indexing an array. */
570 if (!STMT_VINFO_DATA_REF (stmt_info))
573 /* STMT has a data_ref. FORNOW this means that its of one of
577 (This should have been verified in analyze_data_refs).
579 'var' in the second case corresponds to a def, not a use,
580 so USE cannot correspond to any operands that are not used
583 Therefore, all we need to check is if STMT falls into the
584 first case, and whether var corresponds to USE. */
586 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
589 operand = TREE_OPERAND (stmt, 1);
591 if (TREE_CODE (operand) != SSA_NAME)
601 /* Function vect_analyze_scalar_cycles.
603 Examine the cross iteration def-use cycles of scalar variables, by
604 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
605 cycles that they represent do not impede vectorization.
607 FORNOW: Reduction as in the following loop, is not supported yet:
611 The cross-iteration cycle corresponding to variable 'sum' will be
612 considered too complicated and will impede vectorization.
614 FORNOW: Induction as in the following loop, is not supported yet:
619 However, the following loop *is* vectorizable:
624 In both loops there exists a def-use cycle for the variable i:
625 loop: i_2 = PHI (i_0, i_1)
630 The evolution of the above cycle is considered simple enough,
631 however, we also check that the cycle does not need to be
632 vectorized, i.e - we check that the variable that this cycle
633 defines is only used for array indexing or in stmts that do not
634 need to be vectorized. This is not the case in loop2, but it
635 *is* the case in loop3. */
638 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
641 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
642 basic_block bb = loop->header;
645 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
646 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
648 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
650 tree access_fn = NULL;
652 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
654 fprintf (vect_dump, "Analyze phi: ");
655 print_generic_expr (vect_dump, phi, TDF_SLIM);
658 /* Skip virtual phi's. The data dependences that are associated with
659 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
661 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
663 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
664 fprintf (vect_dump, "virtual phi. skip.");
668 /* Analyze the evolution function. */
670 /* FORNOW: The only scalar cross-iteration cycles that we allow are
671 those of loop induction variables; This property is verified here.
673 Furthermore, if that induction variable is used in an operation
674 that needs to be vectorized (i.e, is not solely used to index
675 arrays and check the exit condition) - we do not support its
676 vectorization yet. This property is verified in vect_is_simple_use,
677 during vect_analyze_operations. */
679 access_fn = /* instantiate_parameters
681 analyze_scalar_evolution (loop, PHI_RESULT (phi));
685 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
686 LOOP_LOC (loop_vinfo)))
687 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
691 if (vect_print_dump_info (REPORT_DETAILS,
692 LOOP_LOC (loop_vinfo)))
694 fprintf (vect_dump, "Access function of PHI: ");
695 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
698 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
700 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
701 LOOP_LOC (loop_vinfo)))
702 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
711 /* Function vect_base_addr_differ_p.
713 This is the simplest data dependence test: determines whether the
714 data references A and B access the same array/region. Returns
715 false when the property is not computable at compile time.
716 Otherwise return true, and DIFFER_P will record the result. This
717 utility will not be necessary when alias_sets_conflict_p will be
718 less conservative. */
721 vect_base_addr_differ_p (struct data_reference *dra,
722 struct data_reference *drb,
725 tree stmt_a = DR_STMT (dra);
726 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
727 tree stmt_b = DR_STMT (drb);
728 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
729 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
730 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
731 tree type_a = TREE_TYPE (addr_a);
732 tree type_b = TREE_TYPE (addr_b);
733 HOST_WIDE_INT alias_set_a, alias_set_b;
735 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
737 /* Both references are ADDR_EXPR, i.e., we have the objects. */
738 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
739 return array_base_name_differ_p (dra, drb, differ_p);
741 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
742 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
743 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
744 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
746 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
752 /* An instruction writing through a restricted pointer is "independent" of any
753 instruction reading or writing through a different pointer, in the same
755 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
756 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
765 /* Function vect_analyze_data_ref_dependence.
767 Return TRUE if there (might) exist a dependence between a memory-reference
768 DRA and a memory-reference DRB. */
771 vect_analyze_data_ref_dependence (struct data_reference *dra,
772 struct data_reference *drb,
773 loop_vec_info loop_vinfo)
776 struct data_dependence_relation *ddr;
778 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
780 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
781 LOOP_LOC (loop_vinfo)))
784 "not vectorized: can't determine dependence between: ");
785 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
786 fprintf (vect_dump, " and ");
787 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
795 ddr = initialize_data_dependence_relation (dra, drb);
796 compute_affine_dependence (ddr);
798 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
801 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
802 LOOP_LOC (loop_vinfo)))
805 "not vectorized: possible dependence between data-refs ");
806 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
807 fprintf (vect_dump, " and ");
808 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
815 /* Function vect_analyze_data_ref_dependences.
817 Examine all the data references in the loop, and make sure there do not
818 exist any data dependences between them.
820 TODO: dependences which distance is greater than the vectorization factor
824 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
827 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
828 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
830 /* Examine store-store (output) dependences. */
832 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
833 fprintf (vect_dump, "=== vect_analyze_dependences ===");
835 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
836 fprintf (vect_dump, "compare all store-store pairs.");
838 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
840 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
842 struct data_reference *dra =
843 VARRAY_GENERIC_PTR (loop_write_refs, i);
844 struct data_reference *drb =
845 VARRAY_GENERIC_PTR (loop_write_refs, j);
846 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
851 /* Examine load-store (true/anti) dependences. */
853 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
854 fprintf (vect_dump, "compare all load-store pairs.");
856 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
858 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
860 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
861 struct data_reference *drb =
862 VARRAY_GENERIC_PTR (loop_write_refs, j);
863 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
872 /* Function vect_compute_data_ref_alignment
874 Compute the misalignment of the data reference DR.
877 1. If during the misalignment computation it is found that the data reference
878 cannot be vectorized then false is returned.
879 2. DR_MISALIGNMENT (DR) is defined.
881 FOR NOW: No analysis is actually performed. Misalignment is calculated
882 only for trivial cases. TODO. */
885 vect_compute_data_ref_alignment (struct data_reference *dr)
887 tree stmt = DR_STMT (dr);
888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
889 tree ref = DR_REF (dr);
891 tree base, alignment;
895 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
896 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
898 /* Initialize misalignment to unknown. */
899 DR_MISALIGNMENT (dr) = -1;
901 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
902 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
903 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
904 vectype = STMT_VINFO_VECTYPE (stmt_info);
908 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
910 fprintf (vect_dump, "Unknown alignment for access: ");
911 print_generic_expr (vect_dump, base, TDF_SLIM);
918 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
920 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
922 fprintf (vect_dump, "can't force alignment of ref: ");
923 print_generic_expr (vect_dump, ref, TDF_SLIM);
928 /* Force the alignment of the decl.
929 NOTE: This is the only change to the code we make during
930 the analysis phase, before deciding to vectorize the loop. */
931 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
932 fprintf (vect_dump, "force alignment");
933 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
934 DECL_USER_ALIGN (base) = 1;
937 /* At this point we assume that the base is aligned. */
938 gcc_assert (base_aligned_p
939 || (TREE_CODE (base) == VAR_DECL
940 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
942 /* Alignment required, in bytes: */
943 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
945 /* Modulo alignment. */
946 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
947 if (tree_int_cst_sgn (misalign) < 0)
949 /* Negative misalignment value. */
950 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
951 fprintf (vect_dump, "unexpected misalign value");
955 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
957 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
958 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
964 /* Function vect_compute_data_refs_alignment
966 Compute the misalignment of data references in the loop.
967 This pass may take place at function granularity instead of at loop
970 FOR NOW: No analysis is actually performed. Misalignment is calculated
971 only for trivial cases. TODO. */
974 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
976 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
977 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
980 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
982 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
983 if (!vect_compute_data_ref_alignment (dr))
987 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
989 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
990 if (!vect_compute_data_ref_alignment (dr))
998 /* Function vect_enhance_data_refs_alignment
1000 This pass will use loop versioning and loop peeling in order to enhance
1001 the alignment of data references in the loop.
1003 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1004 original loop is to be vectorized; Any other loops that are created by
1005 the transformations performed in this pass - are not supposed to be
1006 vectorized. This restriction will be relaxed. */
1009 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1011 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1012 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1013 varray_type datarefs;
1014 struct data_reference *dr0 = NULL;
1017 /* Sigh, a hack to make targets that do not define UNITS_PER_SIMD_WORD
1018 bootstrap. Copy UNITS_PER_SIMD_WORD to a local variable to avoid a
1019 "division by zero" error. This error would be issued because we
1020 we do "... % UNITS_PER_SIMD_WORD" below, and UNITS_PER_SIMD_WORD
1021 defaults to 0 if it is not defined by the target. */
1022 int units_per_simd_word = UNITS_PER_SIMD_WORD;
1025 This pass will require a cost model to guide it whether to apply peeling
1026 or versioning or a combination of the two. For example, the scheme that
1027 intel uses when given a loop with several memory accesses, is as follows:
1028 choose one memory access ('p') which alignment you want to force by doing
1029 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1030 other accesses are not necessarily aligned, or (2) use loop versioning to
1031 generate one loop in which all accesses are aligned, and another loop in
1032 which only 'p' is necessarily aligned.
1034 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1035 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1036 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1038 Devising a cost model is the most critical aspect of this work. It will
1039 guide us on which access to peel for, whether to use loop versioning, how
1040 many versions to create, etc. The cost model will probably consist of
1041 generic considerations as well as target specific considerations (on
1042 powerpc for example, misaligned stores are more painful than misaligned
1045 Here is the general steps involved in alignment enhancements:
1047 -- original loop, before alignment analysis:
1048 for (i=0; i<N; i++){
1049 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1050 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1053 -- After vect_compute_data_refs_alignment:
1054 for (i=0; i<N; i++){
1055 x = q[i]; # DR_MISALIGNMENT(q) = 3
1056 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1059 -- Possibility 1: we do loop versioning:
1061 for (i=0; i<N; i++){ # loop 1A
1062 x = q[i]; # DR_MISALIGNMENT(q) = 3
1063 p[i] = y; # DR_MISALIGNMENT(p) = 0
1067 for (i=0; i<N; i++){ # loop 1B
1068 x = q[i]; # DR_MISALIGNMENT(q) = 3
1069 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1073 -- Possibility 2: we do loop peeling:
1074 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1078 for (i = 3; i < N; i++){ # loop 2A
1079 x = q[i]; # DR_MISALIGNMENT(q) = 0
1080 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1083 -- Possibility 3: combination of loop peeling and versioning:
1084 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1089 for (i = 3; i<N; i++){ # loop 3A
1090 x = q[i]; # DR_MISALIGNMENT(q) = 0
1091 p[i] = y; # DR_MISALIGNMENT(p) = 0
1095 for (i = 3; i<N; i++){ # loop 3B
1096 x = q[i]; # DR_MISALIGNMENT(q) = 0
1097 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1101 These loops are later passed to loop_transform to be vectorized. The
1102 vectorizer will use the alignment information to guide the transformation
1103 (whether to generate regular loads/stores, or with special handling for
1107 /* (1) Peeling to force alignment. */
1109 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1111 + How many accesses will become aligned due to the peeling
1112 - How many accesses will become unaligned due to the peeling,
1113 and the cost of misaligned accesses.
1114 - The cost of peeling (the extra runtime checks, the increase
1117 The scheme we use FORNOW: peel to force the alignment of the first
1118 misaligned store in the loop.
1119 Rationale: misaligned stores are not yet supported.
1121 TODO: Use a better cost model. */
1123 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1125 dr0 = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1126 if (!aligned_access_p (dr0))
1128 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1129 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1134 /* (1.2) Update the alignment info according to the peeling factor.
1135 If the misalignment of the DR we peel for is M, then the
1136 peeling factor is VF - M, and the misalignment of each access DR_i
1137 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1138 If the misalignment of the DR we peel for is unknown, then the
1139 misalignment of each access DR_i in the loop is also unknown.
1141 TODO: - consider accesses that are known to have the same
1142 alignment, even if that alignment is unknown. */
1144 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1149 if (known_alignment_for_access_p (dr0))
1151 /* Since it's known at compile time, compute the number of iterations
1152 in the peeled loop (the peeling factor) for use in updating
1153 DR_MISALIGNMENT values. The peeling factor is the vectorization
1154 factor minus the misalignment as an element count. */
1155 mis = DR_MISALIGNMENT (dr0);
1156 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1157 npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
1160 datarefs = loop_write_datarefs;
1161 for (j = 0; j < 2; j++)
1163 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
1165 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
1169 if (known_alignment_for_access_p (dr)
1170 && DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr0))
1171 DR_MISALIGNMENT (dr) = 0;
1172 else if (known_alignment_for_access_p (dr)
1173 && known_alignment_for_access_p (dr0))
1175 int drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1177 DR_MISALIGNMENT (dr) += npeel * drsize;
1178 DR_MISALIGNMENT (dr) %= units_per_simd_word;
1181 DR_MISALIGNMENT (dr) = -1;
1183 datarefs = loop_read_datarefs;
1186 DR_MISALIGNMENT (dr0) = 0;
1191 /* Function vect_analyze_data_refs_alignment
1193 Analyze the alignment of the data-references in the loop.
1194 FOR NOW: Until support for misliagned accesses is in place, only if all
1195 accesses are aligned can the loop be vectorized. This restriction will be
1199 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1201 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1202 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1203 enum dr_alignment_support supportable_dr_alignment;
1206 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1207 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1210 /* This pass may take place at function granularity instead of at loop
1213 if (!vect_compute_data_refs_alignment (loop_vinfo))
1215 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1216 LOOP_LOC (loop_vinfo)))
1218 "not vectorized: can't calculate alignment for data ref.");
1223 /* This pass will decide on using loop versioning and/or loop peeling in
1224 order to enhance the alignment of data references in the loop. */
1226 vect_enhance_data_refs_alignment (loop_vinfo);
1229 /* Finally, check that all the data references in the loop can be
1230 handled with respect to their alignment. */
1232 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1234 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1235 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1236 if (!supportable_dr_alignment)
1238 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1239 LOOP_LOC (loop_vinfo)))
1240 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1243 if (supportable_dr_alignment != dr_aligned
1244 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1245 fprintf (vect_dump, "Vectorizing an unaligned access.");
1247 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1249 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1250 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1251 if (!supportable_dr_alignment)
1253 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1254 LOOP_LOC (loop_vinfo)))
1255 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1258 if (supportable_dr_alignment != dr_aligned
1259 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1260 fprintf (vect_dump, "Vectorizing an unaligned access.");
1262 if (LOOP_VINFO_UNALIGNED_DR (loop_vinfo)
1263 && vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1264 fprintf (vect_dump, "Alignment of access forced using peeling.");
1270 /* Function vect_analyze_data_ref_access.
1272 Analyze the access pattern of the data-reference DR. For now, a data access
1273 has to consecutive to be considered vectorizable. */
1276 vect_analyze_data_ref_access (struct data_reference *dr)
1278 tree stmt = DR_STMT (dr);
1279 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1280 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1281 tree scalar_type = TREE_TYPE (DR_REF (dr));
1283 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1285 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1286 fprintf (vect_dump, "not consecutive access");
1293 /* Function vect_analyze_data_ref_accesses.
1295 Analyze the access pattern of all the data references in the loop.
1297 FORNOW: the only access pattern that is considered vectorizable is a
1298 simple step 1 (consecutive) access.
1300 FORNOW: handle only arrays and pointer accesses. */
1303 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1306 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1307 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1309 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1310 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1312 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1314 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1315 bool ok = vect_analyze_data_ref_access (dr);
1318 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1319 LOOP_LOC (loop_vinfo)))
1320 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1325 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1327 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1328 bool ok = vect_analyze_data_ref_access (dr);
1331 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1332 LOOP_LOC (loop_vinfo)))
1333 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1342 /* Function vect_analyze_pointer_ref_access.
1345 STMT - a stmt that contains a data-ref.
1346 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1347 ACCESS_FN - the access function of MEMREF.
1350 If the data-ref access is vectorizable, return a data_reference structure
1351 that represents it (DR). Otherwise - return NULL.
1352 STEP - the stride of MEMREF in the loop.
1353 INIT - the initial condition of MEMREF in the loop.
1356 static struct data_reference *
1357 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1358 tree access_fn, tree *ptr_init, tree *ptr_step)
1360 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1361 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1362 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1364 tree reftype, innertype;
1365 tree indx_access_fn;
1366 int loopnum = loop->num;
1367 struct data_reference *dr;
1369 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1371 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1372 LOOP_LOC (loop_vinfo)))
1373 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1379 if (!expr_invariant_in_loop_p (loop, init))
1381 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1382 LOOP_LOC (loop_vinfo)))
1384 "not vectorized: initial condition is not loop invariant.");
1388 if (TREE_CODE (step) != INTEGER_CST)
1390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1391 LOOP_LOC (loop_vinfo)))
1393 "not vectorized: non constant step for pointer access.");
1397 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1398 if (!POINTER_TYPE_P (reftype))
1400 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1401 LOOP_LOC (loop_vinfo)))
1402 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1406 if (!POINTER_TYPE_P (TREE_TYPE (init)))
1408 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1409 LOOP_LOC (loop_vinfo)))
1410 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1414 *ptr_step = fold_convert (ssizetype, step);
1415 innertype = TREE_TYPE (reftype);
1416 if (!COMPLETE_TYPE_P (innertype))
1418 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1419 LOOP_LOC (loop_vinfo)))
1420 fprintf (vect_dump, "not vectorized: pointer to incomplete type.");
1424 /* Check that STEP is a multiple of type size. */
1425 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1426 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1428 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1429 LOOP_LOC (loop_vinfo)))
1430 fprintf (vect_dump, "not vectorized: non consecutive access.");
1435 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1436 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1438 fprintf (vect_dump, "Access function of ptr indx: ");
1439 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1441 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1447 /* Function vect_address_analysis
1449 Return the BASE of the address expression EXPR.
1450 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1453 EXPR - the address expression that is being analyzed
1454 STMT - the statement that contains EXPR or its original memory reference
1455 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1456 VECTYPE - the type that defines the alignment (i.e, we compute
1457 alignment relative to TYPE_ALIGN(VECTYPE))
1458 DR - data_reference struct for the original memory reference
1461 BASE (returned value) - the base of the data reference EXPR.
1462 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1463 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1464 computation is impossible
1465 STEP - evolution of EXPR in the loop
1466 BASE_ALIGNED - indicates if BASE is aligned
1468 If something unexpected is encountered (an unsupported form of data-ref),
1469 then NULL_TREE is returned.
1473 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1474 struct data_reference *dr, tree *offset, tree *misalign,
1475 tree *step, bool *base_aligned)
1477 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1478 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1480 struct ptr_info_def *dummy1;
1483 switch (TREE_CODE (expr))
1487 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1488 oprnd0 = TREE_OPERAND (expr, 0);
1489 oprnd1 = TREE_OPERAND (expr, 1);
1491 STRIP_NOPS (oprnd0);
1492 STRIP_NOPS (oprnd1);
1494 /* Recursively try to find the base of the address contained in EXPR.
1495 For offset, the returned base will be NULL. */
1496 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1497 &address_offset, &address_misalign, step,
1500 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1501 &address_offset, &address_misalign, step,
1504 /* We support cases where only one of the operands contains an
1506 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1509 /* To revert STRIP_NOPS. */
1510 oprnd0 = TREE_OPERAND (expr, 0);
1511 oprnd1 = TREE_OPERAND (expr, 1);
1513 offset_expr = base_addr0 ?
1514 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1516 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1517 a number, we can add it to the misalignment value calculated for base,
1518 otherwise, misalignment is NULL. */
1519 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1520 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1523 *misalign = NULL_TREE;
1525 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1527 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1528 return base_addr0 ? base_addr0 : base_addr1;
1531 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt,
1532 is_read, vectype, &dr, offset,
1533 misalign, step, base_aligned,
1534 &dummy, &dummy1, &dummy2);
1535 return base_address;
1538 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1541 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1543 if (vect_get_ptr_offset (expr, vectype, misalign))
1544 *base_aligned = true;
1546 *base_aligned = false;
1550 *base_aligned = true;
1551 *misalign = ssize_int (0);
1553 *offset = ssize_int (0);
1554 *step = ssize_int (0);
1563 /* Function vect_object_analysis
1565 Return the BASE of the data reference MEMREF.
1566 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1567 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1568 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1569 instantiated with initial_conditions of access_functions of variables,
1570 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1572 Function get_inner_reference is used for the above in case of ARRAY_REF and
1575 The structure of the function is as follows:
1577 Case 1. For handled_component_p refs
1578 1.1 call get_inner_reference
1579 1.1.1 analyze offset expr received from get_inner_reference
1580 1.2. build data-reference structure for MEMREF
1581 (fall through with BASE)
1582 Case 2. For declarations
1584 2.2 update DR_BASE_NAME if necessary for alias
1585 Case 3. For INDIRECT_REFs
1586 3.1 get the access function
1587 3.2 analyze evolution of MEMREF
1588 3.3 set data-reference structure for MEMREF
1589 3.4 call vect_address_analysis to analyze INIT of the access function
1592 Combine the results of object and address analysis to calculate
1593 INITIAL_OFFSET, STEP and misalignment info.
1596 MEMREF - the memory reference that is being analyzed
1597 STMT - the statement that contains MEMREF
1598 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1599 VECTYPE - the type that defines the alignment (i.e, we compute
1600 alignment relative to TYPE_ALIGN(VECTYPE))
1603 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1604 E.g, if MEMREF is a.b[k].c[i][j] the returned
1606 DR - data_reference struct for MEMREF
1607 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1608 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1609 the computation is impossible
1610 STEP - evolution of the DR_REF in the loop
1611 BASE_ALIGNED - indicates if BASE is aligned
1612 MEMTAG - memory tag for aliasing purposes
1613 PTR_INFO - NULL or points-to aliasing info from a pointer SSA_NAME
1614 SUBVAR - Sub-variables of the variable
1616 If something unexpected is encountered (an unsupported form of data-ref),
1617 then NULL_TREE is returned. */
1620 vect_object_analysis (tree memref, tree stmt, bool is_read,
1621 tree vectype, struct data_reference **dr,
1622 tree *offset, tree *misalign, tree *step,
1623 bool *base_aligned, tree *memtag,
1624 struct ptr_info_def **ptr_info, subvar_t *subvars)
1626 tree base = NULL_TREE, base_address = NULL_TREE;
1627 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1628 tree object_step = ssize_int (0), address_step = ssize_int (0);
1629 bool object_base_aligned = true, address_base_aligned = true;
1630 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1631 HOST_WIDE_INT pbitsize, pbitpos;
1632 tree poffset, bit_pos_in_bytes;
1633 enum machine_mode pmode;
1634 int punsignedp, pvolatilep;
1635 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1636 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1637 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1638 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1639 struct data_reference *ptr_dr = NULL;
1640 tree access_fn, evolution_part, address_to_analyze;
1645 /* Case 1. handled_component_p refs. */
1646 if (handled_component_p (memref))
1648 /* 1.1 call get_inner_reference. */
1649 /* Find the base and the offset from it. */
1650 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1651 &pmode, &punsignedp, &pvolatilep, false);
1655 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1657 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1658 &object_offset, &object_misalign, &object_step))
1660 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1662 fprintf (vect_dump, "failed to compute offset or step for ");
1663 print_generic_expr (vect_dump, memref, TDF_SLIM);
1668 /* Add bit position to OFFSET and MISALIGN. */
1670 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1671 /* Check that there is no remainder in bits. */
1672 if (pbitpos%BITS_PER_UNIT)
1674 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1675 fprintf (vect_dump, "bit offset alignment.");
1678 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1679 if (object_misalign)
1680 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1683 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1686 if (TREE_CODE (memref) == ARRAY_REF)
1687 *dr = analyze_array (stmt, memref, is_read);
1692 memref = base; /* To continue analysis of BASE. */
1696 /* Part 1: Case 2. Declarations. */
1697 if (DECL_P (memref))
1699 /* We expect to get a decl only if we already have a DR. */
1702 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1704 fprintf (vect_dump, "unhandled decl ");
1705 print_generic_expr (vect_dump, memref, TDF_SLIM);
1710 /* 2.1 check the alignment. */
1711 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1712 object_base_aligned = true;
1714 object_base_aligned = false;
1716 /* 2.2 update DR_BASE_NAME if necessary. */
1717 if (!DR_BASE_NAME ((*dr)))
1718 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1720 DR_BASE_NAME ((*dr)) = memref;
1722 if (SSA_VAR_P (memref) && var_can_have_subvars (memref))
1723 *subvars = get_subvars_for_var (memref);
1724 base_address = build_fold_addr_expr (memref);
1728 /* Part 1: Case 3. INDIRECT_REFs. */
1729 else if (TREE_CODE (memref) == INDIRECT_REF)
1731 tree ptr_ref = TREE_OPERAND (memref, 0);
1732 if (TREE_CODE (ptr_ref) == SSA_NAME)
1733 *ptr_info = SSA_NAME_PTR_INFO (ptr_ref);
1735 /* 3.1 get the access function. */
1736 access_fn = analyze_scalar_evolution (loop, ptr_ref);
1739 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1740 LOOP_LOC (loop_vinfo)))
1741 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1744 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1746 fprintf (vect_dump, "Access function of ptr: ");
1747 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1750 /* 3.2 analyze evolution of MEMREF. */
1751 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1754 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1755 access_fn, &ptr_init, &ptr_step);
1759 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1760 address_to_analyze = ptr_init;
1766 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1767 LOOP_LOC (loop_vinfo)))
1768 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1771 /* Since there exists DR for MEMREF, we are analyzing the init of
1772 the access function, which not necessary has evolution in the
1774 address_to_analyze = initial_condition_in_loop_num (access_fn,
1778 /* 3.3 set data-reference structure for MEMREF. */
1779 *dr = (*dr) ? *dr : ptr_dr;
1781 /* 3.4 call vect_address_analysis to analyze INIT of the access
1783 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1784 vectype, *dr, &address_offset, &address_misalign,
1785 &address_step, &address_base_aligned);
1789 switch (TREE_CODE (base_address))
1792 *memtag = get_var_ann (SSA_NAME_VAR (base_address))->type_mem_tag;
1793 if (!(*memtag) && TREE_CODE (TREE_OPERAND (memref, 0)) == SSA_NAME)
1794 *memtag = get_var_ann (
1795 SSA_NAME_VAR (TREE_OPERAND (memref, 0)))->type_mem_tag;
1798 *memtag = TREE_OPERAND (base_address, 0);
1801 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1802 LOOP_LOC (loop_vinfo)))
1804 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1805 print_generic_expr (vect_dump, memref, TDF_SLIM);
1812 /* MEMREF cannot be analyzed. */
1815 if (SSA_VAR_P (*memtag) && var_can_have_subvars (*memtag))
1816 *subvars = get_subvars_for_var (*memtag);
1818 /* Part 2: Combine the results of object and address analysis to calculate
1819 INITIAL_OFFSET, STEP and misalignment info. */
1820 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1821 if (object_misalign && address_misalign)
1822 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1824 *misalign = NULL_TREE;
1825 *step = size_binop (PLUS_EXPR, object_step, address_step);
1826 *base_aligned = object_base_aligned && address_base_aligned;
1828 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1830 fprintf (vect_dump, "Results of object analysis for: ");
1831 print_generic_expr (vect_dump, memref, TDF_SLIM);
1832 fprintf (vect_dump, "\n\tbase_address: ");
1833 print_generic_expr (vect_dump, base_address, TDF_SLIM);
1834 fprintf (vect_dump, "\n\toffset: ");
1835 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1836 fprintf (vect_dump, "\n\tstep: ");
1837 print_generic_expr (vect_dump, *step, TDF_SLIM);
1838 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1839 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1841 return base_address;
1845 /* Function vect_analyze_data_refs.
1847 Find all the data references in the loop.
1849 The general structure of the analysis of data refs in the vectorizer is as
1851 1- vect_analyze_data_refs(loop):
1852 Find and analyze all data-refs in the loop:
1854 base_address = vect_object_analysis(ref)
1855 1.1- vect_object_analysis(ref):
1856 Analyze ref, and build a DR (data_referece struct) for it;
1857 compute base, initial_offset, step and alignment.
1858 Call get_inner_reference for refs handled in this function.
1859 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1860 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment,
1861 ref_stmt.memtag, ref_stmt.ptr_info and ref_stmt.step accordingly.
1862 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1863 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1864 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1866 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1867 which base is really an array (not a pointer) and which alignment
1868 can be forced. This restriction will be relaxed. */
1871 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1873 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1874 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1875 int nbbs = loop->num_nodes;
1876 block_stmt_iterator si;
1878 struct data_reference *dr;
1880 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1881 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1883 for (j = 0; j < nbbs; j++)
1885 basic_block bb = bbs[j];
1886 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1888 bool is_read = false;
1889 tree stmt = bsi_stmt (si);
1890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1891 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1892 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1893 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1894 varray_type *datarefs = NULL;
1895 int nvuses, nv_may_defs, nv_must_defs;
1897 tree scalar_type, vectype;
1898 tree base, offset, misalign, step, tag;
1899 struct ptr_info_def *ptr_info;
1901 subvar_t subvars = NULL;
1903 /* Assumption: there exists a data-ref in stmt, if and only if
1904 it has vuses/vdefs. */
1906 if (!vuses && !v_may_defs && !v_must_defs)
1909 nvuses = NUM_VUSES (vuses);
1910 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1911 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1913 if (nvuses && (nv_may_defs || nv_must_defs))
1915 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1917 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1918 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1923 if (TREE_CODE (stmt) != MODIFY_EXPR)
1925 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1927 fprintf (vect_dump, "unexpected vops in stmt: ");
1928 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1935 memref = TREE_OPERAND (stmt, 1);
1936 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1941 memref = TREE_OPERAND (stmt, 0);
1942 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1946 scalar_type = TREE_TYPE (memref);
1947 vectype = get_vectype_for_scalar_type (scalar_type);
1950 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1952 fprintf (vect_dump, "no vectype for stmt: ");
1953 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1954 fprintf (vect_dump, " scalar_type: ");
1955 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1957 /* It is not possible to vectorize this data reference. */
1960 /* Analyze MEMREF. If it is of a supported form, build data_reference
1961 struct for it (DR). */
1963 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
1964 &offset, &misalign, &step,
1965 &base_aligned, &tag, &ptr_info,
1969 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1970 LOOP_LOC (loop_vinfo)))
1972 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
1973 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1977 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
1978 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1979 STMT_VINFO_VECT_STEP (stmt_info) = step;
1980 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
1981 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
1982 STMT_VINFO_MEMTAG (stmt_info) = tag;
1983 STMT_VINFO_PTR_INFO (stmt_info) = ptr_info;
1984 STMT_VINFO_SUBVARS (stmt_info) = subvars;
1985 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1986 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
1987 STMT_VINFO_DATA_REF (stmt_info) = dr;
1995 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1997 /* Function vect_mark_relevant.
1999 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
2002 vect_mark_relevant (varray_type *worklist, tree stmt)
2004 stmt_vec_info stmt_info;
2006 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2007 fprintf (vect_dump, "mark relevant.");
2009 if (TREE_CODE (stmt) == PHI_NODE)
2011 VARRAY_PUSH_TREE (*worklist, stmt);
2015 stmt_info = vinfo_for_stmt (stmt);
2019 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2021 fprintf (vect_dump, "mark relevant: no stmt info!!.");
2022 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2027 if (STMT_VINFO_RELEVANT_P (stmt_info))
2029 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2030 fprintf (vect_dump, "already marked relevant.");
2034 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
2035 VARRAY_PUSH_TREE (*worklist, stmt);
2039 /* Function vect_stmt_relevant_p.
2041 Return true if STMT in loop that is represented by LOOP_VINFO is
2042 "relevant for vectorization".
2044 A stmt is considered "relevant for vectorization" if:
2045 - it has uses outside the loop.
2046 - it has vdefs (it alters memory).
2047 - control stmts in the loop (except for the exit condition).
2049 CHECKME: what other side effects would the vectorizer allow? */
2052 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
2054 v_may_def_optype v_may_defs;
2055 v_must_def_optype v_must_defs;
2056 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2057 ssa_op_iter op_iter;
2058 imm_use_iterator imm_iter;
2059 use_operand_p use_p;
2062 /* cond stmt other than loop exit cond. */
2063 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
2066 /* changing memory. */
2067 if (TREE_CODE (stmt) != PHI_NODE)
2069 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2070 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2071 if (v_may_defs || v_must_defs)
2073 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2074 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
2079 /* uses outside the loop. */
2080 FOR_EACH_SSA_TREE_OPERAND (var, stmt, op_iter, SSA_OP_DEF)
2082 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, var)
2084 basic_block bb = bb_for_stmt (USE_STMT (use_p));
2085 if (!flow_bb_inside_loop_p (loop, bb))
2087 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2088 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
2098 /* Function vect_mark_stmts_to_be_vectorized.
2100 Not all stmts in the loop need to be vectorized. For example:
2109 Stmt 1 and 3 do not need to be vectorized, because loop control and
2110 addressing of vectorized data-refs are handled differently.
2112 This pass detects such stmts. */
2115 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2117 varray_type worklist;
2118 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2119 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2120 unsigned int nbbs = loop->num_nodes;
2121 block_stmt_iterator si;
2127 stmt_vec_info stmt_info;
2131 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2132 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2135 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2137 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2139 fprintf (vect_dump, "init: phi relevant? ");
2140 print_generic_expr (vect_dump, phi, TDF_SLIM);
2143 if (vect_stmt_relevant_p (phi, loop_vinfo))
2145 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2146 LOOP_LOC (loop_vinfo)))
2147 fprintf (vect_dump, "unsupported reduction/induction.");
2152 VARRAY_TREE_INIT (worklist, 64, "work list");
2154 /* 1. Init worklist. */
2156 for (i = 0; i < nbbs; i++)
2159 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2161 stmt = bsi_stmt (si);
2163 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2165 fprintf (vect_dump, "init: stmt relevant? ");
2166 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2169 stmt_info = vinfo_for_stmt (stmt);
2170 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2172 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2173 vect_mark_relevant (&worklist, stmt);
2178 /* 2. Process_worklist */
2180 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
2182 stmt = VARRAY_TOP_TREE (worklist);
2183 VARRAY_POP (worklist);
2185 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2187 fprintf (vect_dump, "worklist: examine stmt: ");
2188 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2191 /* Examine the USES in this statement. Mark all the statements which
2192 feed this statement's uses as "relevant", unless the USE is used as
2195 if (TREE_CODE (stmt) == PHI_NODE)
2197 /* follow the def-use chain inside the loop. */
2198 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2200 tree arg = PHI_ARG_DEF (stmt, j);
2201 tree def_stmt = NULL_TREE;
2203 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2205 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2206 LOOP_LOC (loop_vinfo)))
2207 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2208 varray_clear (worklist);
2214 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2216 fprintf (vect_dump, "worklist: def_stmt: ");
2217 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2220 bb = bb_for_stmt (def_stmt);
2221 if (flow_bb_inside_loop_p (loop, bb))
2222 vect_mark_relevant (&worklist, def_stmt);
2226 ann = stmt_ann (stmt);
2227 use_ops = USE_OPS (ann);
2229 for (i = 0; i < NUM_USES (use_ops); i++)
2231 tree use = USE_OP (use_ops, i);
2233 /* We are only interested in uses that need to be vectorized. Uses
2234 that are used for address computation are not considered relevant.
2236 if (exist_non_indexing_operands_for_use_p (use, stmt))
2238 tree def_stmt = NULL_TREE;
2240 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2242 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2243 LOOP_LOC (loop_vinfo)))
2244 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2245 varray_clear (worklist);
2252 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2254 fprintf (vect_dump, "worklist: examine use %d: ", i);
2255 print_generic_expr (vect_dump, use, TDF_SLIM);
2258 bb = bb_for_stmt (def_stmt);
2259 if (flow_bb_inside_loop_p (loop, bb))
2260 vect_mark_relevant (&worklist, def_stmt);
2263 } /* while worklist */
2265 varray_clear (worklist);
2270 /* Function vect_can_advance_ivs_p
2272 In case the number of iterations that LOOP iterates in unknown at compile
2273 time, an epilog loop will be generated, and the loop induction variables
2274 (IVs) will be "advanced" to the value they are supposed to take just before
2275 the epilog loop. Here we check that the access function of the loop IVs
2276 and the expression that represents the loop bound are simple enough.
2277 These restrictions will be relaxed in the future. */
2280 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2282 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2283 basic_block bb = loop->header;
2286 /* Analyze phi functions of the loop header. */
2288 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2290 tree access_fn = NULL;
2291 tree evolution_part;
2293 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2295 fprintf (vect_dump, "Analyze phi: ");
2296 print_generic_expr (vect_dump, phi, TDF_SLIM);
2299 /* Skip virtual phi's. The data dependences that are associated with
2300 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2302 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2304 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2305 fprintf (vect_dump, "virtual phi. skip.");
2309 /* Analyze the evolution function. */
2311 access_fn = instantiate_parameters
2312 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2316 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2317 fprintf (vect_dump, "No Access function.");
2321 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2323 fprintf (vect_dump, "Access function of PHI: ");
2324 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2327 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2329 if (evolution_part == NULL_TREE)
2332 /* FORNOW: We do not transform initial conditions of IVs
2333 which evolution functions are a polynomial of degree >= 2. */
2335 if (tree_is_chrec (evolution_part))
2343 /* Function vect_get_loop_niters.
2345 Determine how many iterations the loop is executed.
2346 If an expression that represents the number of iterations
2347 can be constructed, place it in NUMBER_OF_ITERATIONS.
2348 Return the loop exit condition. */
2351 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2355 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2356 fprintf (vect_dump, "=== get_loop_niters ===");
2358 niters = number_of_iterations_in_loop (loop);
2360 if (niters != NULL_TREE
2361 && niters != chrec_dont_know)
2363 *number_of_iterations = niters;
2365 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2367 fprintf (vect_dump, "==> get_loop_niters:" );
2368 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2372 return get_loop_exit_condition (loop);
2376 /* Function vect_analyze_loop_form.
2378 Verify the following restrictions (some may be relaxed in the future):
2379 - it's an inner-most loop
2380 - number of BBs = 2 (which are the loop header and the latch)
2381 - the loop has a pre-header
2382 - the loop has a single entry and exit
2383 - the loop exit condition is simple enough, and the number of iterations
2384 can be analyzed (a countable loop). */
2386 static loop_vec_info
2387 vect_analyze_loop_form (struct loop *loop)
2389 loop_vec_info loop_vinfo;
2391 tree number_of_iterations = NULL;
2394 loop_loc = find_loop_location (loop);
2396 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2397 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2401 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2402 fprintf (vect_dump, "not vectorized: nested loop.");
2406 if (!loop->single_exit
2407 || loop->num_nodes != 2
2408 || EDGE_COUNT (loop->header->preds) != 2)
2410 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2412 if (!loop->single_exit)
2413 fprintf (vect_dump, "not vectorized: multiple exits.");
2414 else if (loop->num_nodes != 2)
2415 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2416 else if (EDGE_COUNT (loop->header->preds) != 2)
2417 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2423 /* We assume that the loop exit condition is at the end of the loop. i.e,
2424 that the loop is represented as a do-while (with a proper if-guard
2425 before the loop if needed), where the loop header contains all the
2426 executable statements, and the latch is empty. */
2427 if (!empty_block_p (loop->latch))
2429 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2430 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2434 /* Make sure there exists a single-predecessor exit bb: */
2435 if (!single_pred_p (loop->single_exit->dest))
2437 edge e = loop->single_exit;
2438 if (!(e->flags & EDGE_ABNORMAL))
2440 split_loop_exit_edge (e);
2441 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2442 fprintf (vect_dump, "split exit edge.");
2446 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2447 fprintf (vect_dump, "not vectorized: abnormal loop exit edge.");
2452 if (empty_block_p (loop->header))
2454 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2455 fprintf (vect_dump, "not vectorized: empty loop.");
2459 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2462 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2463 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2467 if (!number_of_iterations)
2469 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2471 "not vectorized: number of iterations cannot be computed.");
2475 if (chrec_contains_undetermined (number_of_iterations))
2477 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2478 fprintf (vect_dump, "Infinite number of iterations.");
2482 loop_vinfo = new_loop_vec_info (loop);
2483 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2485 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2487 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2489 fprintf (vect_dump, "Symbolic number of iterations is ");
2490 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2494 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2496 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2497 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2501 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2502 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2508 /* Function vect_analyze_loop.
2510 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2511 for it. The different analyses will record information in the
2512 loop_vec_info struct. */
2514 vect_analyze_loop (struct loop *loop)
2517 loop_vec_info loop_vinfo;
2519 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2520 fprintf (vect_dump, "===== analyze_loop_nest =====");
2522 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2524 loop_vinfo = vect_analyze_loop_form (loop);
2527 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2528 fprintf (vect_dump, "bad loop form.");
2532 /* Find all data references in the loop (which correspond to vdefs/vuses)
2533 and analyze their evolution in the loop.
2535 FORNOW: Handle only simple, array references, which
2536 alignment can be forced, and aligned pointer-references. */
2538 ok = vect_analyze_data_refs (loop_vinfo);
2541 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2542 fprintf (vect_dump, "bad data references.");
2543 destroy_loop_vec_info (loop_vinfo);
2547 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2549 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2552 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2553 fprintf (vect_dump, "unexpected pattern.");
2554 destroy_loop_vec_info (loop_vinfo);
2558 /* Check that all cross-iteration scalar data-flow cycles are OK.
2559 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2561 ok = vect_analyze_scalar_cycles (loop_vinfo);
2564 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2565 fprintf (vect_dump, "bad scalar cycle.");
2566 destroy_loop_vec_info (loop_vinfo);
2570 /* Analyze data dependences between the data-refs in the loop.
2571 FORNOW: fail at the first data dependence that we encounter. */
2573 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2576 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2577 fprintf (vect_dump, "bad data dependence.");
2578 destroy_loop_vec_info (loop_vinfo);
2582 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2583 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2585 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2588 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2589 fprintf (vect_dump, "bad data access.");
2590 destroy_loop_vec_info (loop_vinfo);
2594 ok = vect_determine_vectorization_factor (loop_vinfo);
2597 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2598 fprintf (vect_dump, "can't determine vectorization factor.");
2599 destroy_loop_vec_info (loop_vinfo);
2603 /* Analyze the alignment of the data-refs in the loop.
2604 FORNOW: Only aligned accesses are handled. */
2606 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2609 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2610 fprintf (vect_dump, "bad data alignment.");
2611 destroy_loop_vec_info (loop_vinfo);
2615 /* Scan all the operations in the loop and make sure they are
2618 ok = vect_analyze_operations (loop_vinfo);
2621 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2622 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2623 destroy_loop_vec_info (loop_vinfo);
2627 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;