1 /* Anlaysis Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-data-ref.h"
39 #include "tree-scalar-evolution.h"
40 #include "tree-vectorizer.h"
42 /* Main analysis functions. */
43 static loop_vec_info vect_analyze_loop_form (struct loop *);
44 static bool vect_analyze_data_refs (loop_vec_info);
45 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
46 static bool vect_analyze_scalar_cycles (loop_vec_info);
47 static bool vect_analyze_data_ref_accesses (loop_vec_info);
48 static bool vect_analyze_data_ref_dependences (loop_vec_info);
49 static bool vect_analyze_data_refs_alignment (loop_vec_info);
50 static bool vect_compute_data_refs_alignment (loop_vec_info);
51 static void vect_enhance_data_refs_alignment (loop_vec_info);
52 static bool vect_analyze_operations (loop_vec_info);
54 /* Utility functions for the analyses. */
55 static bool exist_non_indexing_operands_for_use_p (tree, tree);
56 static void vect_mark_relevant (varray_type *, tree);
57 static bool vect_stmt_relevant_p (tree, loop_vec_info);
58 static tree vect_get_loop_niters (struct loop *, tree *);
59 static bool vect_analyze_data_ref_dependence
60 (struct data_reference *, struct data_reference *, loop_vec_info);
61 static bool vect_compute_data_ref_alignment (struct data_reference *);
62 static bool vect_analyze_data_ref_access (struct data_reference *);
63 static struct data_reference * vect_analyze_pointer_ref_access
64 (tree, tree, bool, tree, tree *, tree *);
65 static bool vect_can_advance_ivs_p (loop_vec_info);
66 static tree vect_get_ptr_offset (tree, tree, tree *);
67 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
69 static bool vect_base_addr_differ_p (struct data_reference *,
70 struct data_reference *drb, bool *);
71 static tree vect_object_analysis (tree, tree, bool, tree,
72 struct data_reference **, tree *, tree *,
74 static tree vect_address_analysis (tree, tree, bool, tree,
75 struct data_reference *, tree *, tree *,
77 static tree vect_get_memtag (tree, struct data_reference *);
80 /* Function vect_get_ptr_offset
82 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
85 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
86 tree vectype ATTRIBUTE_UNUSED,
87 tree *offset ATTRIBUTE_UNUSED)
89 /* TODO: Use alignment information. */
94 /* Function vect_analyze_offset_expr
96 Given an offset expression EXPR received from get_inner_reference, analyze
97 it and create an expression for INITIAL_OFFSET by substituting the variables
98 of EXPR with initial_condition of the corresponding access_fn in the loop.
101 for (j = 3; j < N; j++)
104 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
105 substituted, since its access_fn in the inner loop is i. 'j' will be
106 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
109 Compute MISALIGN (the misalignment of the data reference initial access from
110 its base) if possible. Misalignment can be calculated only if all the
111 variables can be substituted with constants, or if a variable is multiplied
112 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
113 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
114 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
115 VECTYPE_ALIGNMENT computation in the caller of this function).
117 STEP is an evolution of the data reference in this loop in bytes.
118 In the above example, STEP is C_j.
120 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
121 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
122 are NULL_TREEs. Otherwise, return TRUE.
127 vect_analyze_offset_expr (tree expr,
129 tree vectype_alignment,
130 tree *initial_offset,
136 tree left_offset = ssize_int (0);
137 tree right_offset = ssize_int (0);
138 tree left_misalign = ssize_int (0);
139 tree right_misalign = ssize_int (0);
140 tree left_step = ssize_int (0);
141 tree right_step = ssize_int (0);
143 tree init, evolution;
146 *misalign = NULL_TREE;
147 *initial_offset = NULL_TREE;
149 /* Strip conversions that don't narrow the mode. */
150 expr = vect_strip_conversion (expr);
156 if (TREE_CODE (expr) == INTEGER_CST)
158 *initial_offset = fold_convert (ssizetype, expr);
159 *misalign = fold_convert (ssizetype, expr);
160 *step = ssize_int (0);
164 /* 2. Variable. Try to substitute with initial_condition of the corresponding
165 access_fn in the current loop. */
166 if (SSA_VAR_P (expr))
168 tree access_fn = analyze_scalar_evolution (loop, expr);
170 if (access_fn == chrec_dont_know)
174 init = initial_condition_in_loop_num (access_fn, loop->num);
175 if (init == expr && !expr_invariant_in_loop_p (loop, init))
176 /* Not enough information: may be not loop invariant.
177 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
178 initial_condition is D, but it depends on i - loop's induction
182 evolution = evolution_part_in_loop_num (access_fn, loop->num);
183 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
184 /* Evolution is not constant. */
187 if (TREE_CODE (init) == INTEGER_CST)
188 *misalign = fold_convert (ssizetype, init);
190 /* Not constant, misalignment cannot be calculated. */
191 *misalign = NULL_TREE;
193 *initial_offset = fold_convert (ssizetype, init);
195 *step = evolution ? fold_convert (ssizetype, evolution) : ssize_int (0);
199 /* Recursive computation. */
200 if (!BINARY_CLASS_P (expr))
202 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
203 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
205 fprintf (vect_dump, "Not binary expression ");
206 print_generic_expr (vect_dump, expr, TDF_SLIM);
210 oprnd0 = TREE_OPERAND (expr, 0);
211 oprnd1 = TREE_OPERAND (expr, 1);
213 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
214 &left_misalign, &left_step)
215 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
216 &right_offset, &right_misalign, &right_step))
219 /* The type of the operation: plus, minus or mult. */
220 code = TREE_CODE (expr);
224 if (TREE_CODE (right_offset) != INTEGER_CST)
225 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
227 FORNOW: We don't support such cases. */
230 /* Strip conversions that don't narrow the mode. */
231 left_offset = vect_strip_conversion (left_offset);
234 /* Misalignment computation. */
235 if (SSA_VAR_P (left_offset))
237 /* If the left side contains variables that can't be substituted with
238 constants, we check if the right side is a multiple of ALIGNMENT.
240 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
241 fold_convert (ssizetype, vectype_alignment))))
242 *misalign = ssize_int (0);
244 /* If the remainder is not zero or the right side isn't constant,
245 we can't compute misalignment. */
246 *misalign = NULL_TREE;
250 /* The left operand was successfully substituted with constant. */
252 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
254 *misalign = size_binop (code, left_misalign, right_misalign);
256 *misalign = NULL_TREE;
259 /* Step calculation. */
260 /* Multiply the step by the right operand. */
261 *step = size_binop (MULT_EXPR, left_step, right_offset);
266 /* Combine the recursive calculations for step and misalignment. */
267 *step = size_binop (code, left_step, right_step);
269 if (left_misalign && right_misalign)
270 *misalign = size_binop (code, left_misalign, right_misalign);
272 *misalign = NULL_TREE;
280 /* Compute offset. */
281 *initial_offset = fold_convert (ssizetype,
282 fold (build2 (code, TREE_TYPE (left_offset),
289 /* Function vect_analyze_operations.
291 Scan the loop stmts and make sure they are all vectorizable. */
294 vect_analyze_operations (loop_vec_info loop_vinfo)
296 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
297 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
298 int nbbs = loop->num_nodes;
299 block_stmt_iterator si;
300 unsigned int vectorization_factor = 0;
305 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
306 fprintf (vect_dump, "=== vect_analyze_operations ===");
308 for (i = 0; i < nbbs; i++)
310 basic_block bb = bbs[i];
312 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
314 tree stmt = bsi_stmt (si);
316 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
319 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
321 fprintf (vect_dump, "==> examining statement: ");
322 print_generic_expr (vect_dump, stmt, TDF_SLIM);
325 gcc_assert (stmt_info);
327 /* skip stmts which do not need to be vectorized.
328 this is expected to include:
329 - the COND_EXPR which is the loop exit condition
330 - any LABEL_EXPRs in the loop
331 - computations that are used only for array indexing or loop
334 if (!STMT_VINFO_RELEVANT_P (stmt_info))
336 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
337 fprintf (vect_dump, "irrelevant.");
341 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
343 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
344 LOOP_LOC (loop_vinfo)))
346 fprintf (vect_dump, "not vectorized: vector stmt in loop:");
347 print_generic_expr (vect_dump, stmt, TDF_SLIM);
352 if (STMT_VINFO_DATA_REF (stmt_info))
353 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
354 else if (TREE_CODE (stmt) == MODIFY_EXPR)
355 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
357 scalar_type = TREE_TYPE (stmt);
359 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
361 fprintf (vect_dump, "get vectype for scalar type: ");
362 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
365 vectype = get_vectype_for_scalar_type (scalar_type);
368 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
369 LOOP_LOC (loop_vinfo)))
372 "not vectorized: unsupported data-type ");
373 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
378 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
380 fprintf (vect_dump, "vectype: ");
381 print_generic_expr (vect_dump, vectype, TDF_SLIM);
383 STMT_VINFO_VECTYPE (stmt_info) = vectype;
385 ok = (vectorizable_operation (stmt, NULL, NULL)
386 || vectorizable_assignment (stmt, NULL, NULL)
387 || vectorizable_load (stmt, NULL, NULL)
388 || vectorizable_store (stmt, NULL, NULL));
392 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
393 LOOP_LOC (loop_vinfo)))
395 fprintf (vect_dump, "not vectorized: stmt not supported: ");
396 print_generic_expr (vect_dump, stmt, TDF_SLIM);
401 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
402 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
403 fprintf (vect_dump, "nunits = %d", nunits);
405 if (vectorization_factor)
407 /* FORNOW: don't allow mixed units.
408 This restriction will be relaxed in the future. */
409 if (nunits != vectorization_factor)
411 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
412 LOOP_LOC (loop_vinfo)))
413 fprintf (vect_dump, "not vectorized: mixed data-types");
418 vectorization_factor = nunits;
420 #ifdef ENABLE_CHECKING
421 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
422 * vectorization_factor == UNITS_PER_SIMD_WORD);
427 /* TODO: Analyze cost. Decide if worth while to vectorize. */
429 if (vectorization_factor <= 1)
431 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
432 LOOP_LOC (loop_vinfo)))
433 fprintf (vect_dump, "not vectorized: unsupported data-type");
436 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
438 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
439 && vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
441 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
442 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
444 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
445 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
447 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
448 LOOP_LOC (loop_vinfo)))
449 fprintf (vect_dump, "not vectorized: iteration count too small.");
453 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
454 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
456 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
457 fprintf (vect_dump, "epilog loop required.");
458 if (!vect_can_advance_ivs_p (loop_vinfo))
460 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
461 LOOP_LOC (loop_vinfo)))
463 "not vectorized: can't create epilog loop 1.");
466 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
468 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
469 LOOP_LOC (loop_vinfo)))
471 "not vectorized: can't create epilog loop 2.");
480 /* Function exist_non_indexing_operands_for_use_p
482 USE is one of the uses attached to STMT. Check if USE is
483 used in STMT for anything other than indexing an array. */
486 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
489 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
491 /* USE corresponds to some operand in STMT. If there is no data
492 reference in STMT, then any operand that corresponds to USE
493 is not indexing an array. */
494 if (!STMT_VINFO_DATA_REF (stmt_info))
497 /* STMT has a data_ref. FORNOW this means that its of one of
501 (This should have been verified in analyze_data_refs).
503 'var' in the second case corresponds to a def, not a use,
504 so USE cannot correspond to any operands that are not used
507 Therefore, all we need to check is if STMT falls into the
508 first case, and whether var corresponds to USE. */
510 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
513 operand = TREE_OPERAND (stmt, 1);
515 if (TREE_CODE (operand) != SSA_NAME)
525 /* Function vect_analyze_scalar_cycles.
527 Examine the cross iteration def-use cycles of scalar variables, by
528 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
529 cycles that they represent do not impede vectorization.
531 FORNOW: Reduction as in the following loop, is not supported yet:
535 The cross-iteration cycle corresponding to variable 'sum' will be
536 considered too complicated and will impede vectorization.
538 FORNOW: Induction as in the following loop, is not supported yet:
543 However, the following loop *is* vectorizable:
548 In both loops there exists a def-use cycle for the variable i:
549 loop: i_2 = PHI (i_0, i_1)
554 The evolution of the above cycle is considered simple enough,
555 however, we also check that the cycle does not need to be
556 vectorized, i.e - we check that the variable that this cycle
557 defines is only used for array indexing or in stmts that do not
558 need to be vectorized. This is not the case in loop2, but it
559 *is* the case in loop3. */
562 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
565 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
566 basic_block bb = loop->header;
569 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
570 fprintf (vect_dump, "=== vect_analyze_scalar_cycles ===");
572 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
574 tree access_fn = NULL;
576 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
578 fprintf (vect_dump, "Analyze phi: ");
579 print_generic_expr (vect_dump, phi, TDF_SLIM);
582 /* Skip virtual phi's. The data dependences that are associated with
583 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
585 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
587 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
588 fprintf (vect_dump, "virtual phi. skip.");
592 /* Analyze the evolution function. */
594 /* FORNOW: The only scalar cross-iteration cycles that we allow are
595 those of loop induction variables; This property is verified here.
597 Furthermore, if that induction variable is used in an operation
598 that needs to be vectorized (i.e, is not solely used to index
599 arrays and check the exit condition) - we do not support its
600 vectorization yet. This property is verified in vect_is_simple_use,
601 during vect_analyze_operations. */
603 access_fn = /* instantiate_parameters
605 analyze_scalar_evolution (loop, PHI_RESULT (phi));
609 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
610 LOOP_LOC (loop_vinfo)))
611 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
615 if (vect_print_dump_info (REPORT_DETAILS,
616 LOOP_LOC (loop_vinfo)))
618 fprintf (vect_dump, "Access function of PHI: ");
619 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
622 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy, &dummy))
624 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
625 LOOP_LOC (loop_vinfo)))
626 fprintf (vect_dump, "not vectorized: unsupported scalar cycle.");
635 /* Function vect_base_addr_differ_p.
637 This is the simplest data dependence test: determines whether the
638 data references A and B access the same array/region. Returns
639 false when the property is not computable at compile time.
640 Otherwise return true, and DIFFER_P will record the result. This
641 utility will not be necessary when alias_sets_conflict_p will be
642 less conservative. */
645 vect_base_addr_differ_p (struct data_reference *dra,
646 struct data_reference *drb,
649 tree stmt_a = DR_STMT (dra);
650 stmt_vec_info stmt_info_a = vinfo_for_stmt (stmt_a);
651 tree stmt_b = DR_STMT (drb);
652 stmt_vec_info stmt_info_b = vinfo_for_stmt (stmt_b);
653 tree addr_a = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_a);
654 tree addr_b = STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info_b);
655 tree type_a = TREE_TYPE (addr_a);
656 tree type_b = TREE_TYPE (addr_b);
657 HOST_WIDE_INT alias_set_a, alias_set_b;
659 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
661 /* Both references are ADDR_EXPR, i.e., we have the objects. */
662 if (TREE_CODE (addr_a) == ADDR_EXPR && TREE_CODE (addr_b) == ADDR_EXPR)
663 return array_base_name_differ_p (dra, drb, differ_p);
665 alias_set_a = (TREE_CODE (addr_a) == ADDR_EXPR) ?
666 get_alias_set (TREE_OPERAND (addr_a, 0)) : get_alias_set (addr_a);
667 alias_set_b = (TREE_CODE (addr_b) == ADDR_EXPR) ?
668 get_alias_set (TREE_OPERAND (addr_b, 0)) : get_alias_set (addr_b);
670 if (!alias_sets_conflict_p (alias_set_a, alias_set_b))
676 /* An instruction writing through a restricted pointer is "independent" of any
677 instruction reading or writing through a different pointer, in the same
679 else if ((TYPE_RESTRICT (type_a) && !DR_IS_READ (dra))
680 || (TYPE_RESTRICT (type_b) && !DR_IS_READ (drb)))
689 /* Function vect_analyze_data_ref_dependence.
691 Return TRUE if there (might) exist a dependence between a memory-reference
692 DRA and a memory-reference DRB. */
695 vect_analyze_data_ref_dependence (struct data_reference *dra,
696 struct data_reference *drb,
697 loop_vec_info loop_vinfo)
700 struct data_dependence_relation *ddr;
702 if (!vect_base_addr_differ_p (dra, drb, &differ_p))
704 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
705 LOOP_LOC (loop_vinfo)))
708 "not vectorized: can't determine dependence between: ");
709 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
710 fprintf (vect_dump, " and ");
711 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
719 ddr = initialize_data_dependence_relation (dra, drb);
720 compute_affine_dependence (ddr);
722 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
725 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
726 LOOP_LOC (loop_vinfo)))
729 "not vectorized: possible dependence between data-refs ");
730 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
731 fprintf (vect_dump, " and ");
732 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
739 /* Function vect_analyze_data_ref_dependences.
741 Examine all the data references in the loop, and make sure there do not
742 exist any data dependences between them.
744 TODO: dependences which distance is greater than the vectorization factor
748 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
751 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
752 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
754 /* Examine store-store (output) dependences. */
756 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
757 fprintf (vect_dump, "=== vect_analyze_dependences ===");
759 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
760 fprintf (vect_dump, "compare all store-store pairs.");
762 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
764 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
766 struct data_reference *dra =
767 VARRAY_GENERIC_PTR (loop_write_refs, i);
768 struct data_reference *drb =
769 VARRAY_GENERIC_PTR (loop_write_refs, j);
770 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
775 /* Examine load-store (true/anti) dependences. */
777 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
778 fprintf (vect_dump, "compare all load-store pairs.");
780 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
782 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
784 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
785 struct data_reference *drb =
786 VARRAY_GENERIC_PTR (loop_write_refs, j);
787 if (vect_analyze_data_ref_dependence (dra, drb, loop_vinfo))
796 /* Function vect_compute_data_ref_alignment
798 Compute the misalignment of the data reference DR.
801 1. If during the misalignment computation it is found that the data reference
802 cannot be vectorized then false is returned.
803 2. DR_MISALIGNMENT (DR) is defined.
805 FOR NOW: No analysis is actually performed. Misalignment is calculated
806 only for trivial cases. TODO. */
809 vect_compute_data_ref_alignment (struct data_reference *dr)
811 tree stmt = DR_STMT (dr);
812 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
813 tree ref = DR_REF (dr);
815 tree base, alignment;
819 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
820 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
822 /* Initialize misalignment to unknown. */
823 DR_MISALIGNMENT (dr) = -1;
825 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
826 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
827 base = build_fold_indirect_ref (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
828 vectype = STMT_VINFO_VECTYPE (stmt_info);
832 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
834 fprintf (vect_dump, "Unknown alignment for access: ");
835 print_generic_expr (vect_dump, base, TDF_SLIM);
842 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
844 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
846 fprintf (vect_dump, "can't force alignment of ref: ");
847 print_generic_expr (vect_dump, ref, TDF_SLIM);
852 /* Force the alignment of the decl.
853 NOTE: This is the only change to the code we make during
854 the analysis phase, before deciding to vectorize the loop. */
855 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
856 fprintf (vect_dump, "force alignment");
857 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
858 DECL_USER_ALIGN (base) = 1;
861 /* At this point we assume that the base is aligned. */
862 gcc_assert (base_aligned_p
863 || (TREE_CODE (base) == VAR_DECL
864 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
866 /* Alignment required, in bytes: */
867 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
869 /* Modulo alignment. */
870 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
871 if (tree_int_cst_sgn (misalign) < 0)
873 /* Negative misalignment value. */
874 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
875 fprintf (vect_dump, "unexpected misalign value");
879 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
881 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
882 fprintf (vect_dump, "misalign = %d bytes", DR_MISALIGNMENT (dr));
888 /* Function vect_compute_data_refs_alignment
890 Compute the misalignment of data references in the loop.
891 This pass may take place at function granularity instead of at loop
894 FOR NOW: No analysis is actually performed. Misalignment is calculated
895 only for trivial cases. TODO. */
898 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
900 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
901 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
904 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
906 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
907 if (!vect_compute_data_ref_alignment (dr))
911 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
913 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
914 if (!vect_compute_data_ref_alignment (dr))
922 /* Function vect_enhance_data_refs_alignment
924 This pass will use loop versioning and loop peeling in order to enhance
925 the alignment of data references in the loop.
927 FOR NOW: we assume that whatever versioning/peeling takes place, only the
928 original loop is to be vectorized; Any other loops that are created by
929 the transformations performed in this pass - are not supposed to be
930 vectorized. This restriction will be relaxed. */
933 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
935 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
936 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
940 This pass will require a cost model to guide it whether to apply peeling
941 or versioning or a combination of the two. For example, the scheme that
942 intel uses when given a loop with several memory accesses, is as follows:
943 choose one memory access ('p') which alignment you want to force by doing
944 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
945 other accesses are not necessarily aligned, or (2) use loop versioning to
946 generate one loop in which all accesses are aligned, and another loop in
947 which only 'p' is necessarily aligned.
949 ("Automatic Intra-Register Vectorization for the Intel Architecture",
950 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
951 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
953 Devising a cost model is the most critical aspect of this work. It will
954 guide us on which access to peel for, whether to use loop versioning, how
955 many versions to create, etc. The cost model will probably consist of
956 generic considerations as well as target specific considerations (on
957 powerpc for example, misaligned stores are more painful than misaligned
960 Here is the general steps involved in alignment enhancements:
962 -- original loop, before alignment analysis:
964 x = q[i]; # DR_MISALIGNMENT(q) = unknown
965 p[i] = y; # DR_MISALIGNMENT(p) = unknown
968 -- After vect_compute_data_refs_alignment:
970 x = q[i]; # DR_MISALIGNMENT(q) = 3
971 p[i] = y; # DR_MISALIGNMENT(p) = unknown
974 -- Possibility 1: we do loop versioning:
976 for (i=0; i<N; i++){ # loop 1A
977 x = q[i]; # DR_MISALIGNMENT(q) = 3
978 p[i] = y; # DR_MISALIGNMENT(p) = 0
982 for (i=0; i<N; i++){ # loop 1B
983 x = q[i]; # DR_MISALIGNMENT(q) = 3
984 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
988 -- Possibility 2: we do loop peeling:
989 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
993 for (i = 3; i < N; i++){ # loop 2A
994 x = q[i]; # DR_MISALIGNMENT(q) = 0
995 p[i] = y; # DR_MISALIGNMENT(p) = unknown
998 -- Possibility 3: combination of loop peeling and versioning:
999 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1004 for (i = 3; i<N; i++){ # loop 3A
1005 x = q[i]; # DR_MISALIGNMENT(q) = 0
1006 p[i] = y; # DR_MISALIGNMENT(p) = 0
1010 for (i = 3; i<N; i++){ # loop 3B
1011 x = q[i]; # DR_MISALIGNMENT(q) = 0
1012 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1016 These loops are later passed to loop_transform to be vectorized. The
1017 vectorizer will use the alignment information to guide the transformation
1018 (whether to generate regular loads/stores, or with special handling for
1022 /* (1) Peeling to force alignment. */
1024 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1026 + How many accesses will become aligned due to the peeling
1027 - How many accesses will become unaligned due to the peeling,
1028 and the cost of misaligned accesses.
1029 - The cost of peeling (the extra runtime checks, the increase
1032 The scheme we use FORNOW: peel to force the alignment of the first
1033 misaligned store in the loop.
1034 Rationale: misaligned stores are not yet supported.
1036 TODO: Use a better cost model. */
1038 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1040 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1041 if (!aligned_access_p (dr))
1043 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
1044 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
1049 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
1051 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1052 fprintf (vect_dump, "Peeling for alignment will not be applied.");
1056 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
1057 fprintf (vect_dump, "Peeling for alignment will be applied.");
1060 /* (1.2) Update the alignment info according to the peeling factor.
1061 If the misalignment of the DR we peel for is M, then the
1062 peeling factor is VF - M, and the misalignment of each access DR_i
1063 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
1064 If the misalignment of the DR we peel for is unknown, then the
1065 misalignment of each access DR_i in the loop is also unknown.
1067 FORNOW: set the misalignment of the accesses to unknown even
1068 if the peeling factor is known at compile time.
1070 TODO: - if the peeling factor is known at compile time, use that
1071 when updating the misalignment info of the loop DRs.
1072 - consider accesses that are known to have the same
1073 alignment, even if that alignment is unknown. */
1075 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1077 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1078 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
1080 DR_MISALIGNMENT (dr) = 0;
1081 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1082 fprintf (vect_dump, "Alignment of access forced using peeling.");
1085 DR_MISALIGNMENT (dr) = -1;
1087 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1089 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1090 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
1092 DR_MISALIGNMENT (dr) = 0;
1093 if (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo)))
1094 fprintf (vect_dump, "Alignment of access forced using peeling.");
1097 DR_MISALIGNMENT (dr) = -1;
1102 /* Function vect_analyze_data_refs_alignment
1104 Analyze the alignment of the data-references in the loop.
1105 FOR NOW: Until support for misliagned accesses is in place, only if all
1106 accesses are aligned can the loop be vectorized. This restriction will be
1110 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1112 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1113 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1114 enum dr_alignment_support supportable_dr_alignment;
1117 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1118 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1121 /* This pass may take place at function granularity instead of at loop
1124 if (!vect_compute_data_refs_alignment (loop_vinfo))
1126 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1127 LOOP_LOC (loop_vinfo)))
1129 "not vectorized: can't calculate alignment for data ref.");
1134 /* This pass will decide on using loop versioning and/or loop peeling in
1135 order to enhance the alignment of data references in the loop. */
1137 vect_enhance_data_refs_alignment (loop_vinfo);
1140 /* Finally, check that all the data references in the loop can be
1141 handled with respect to their alignment. */
1143 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1145 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1146 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1147 if (!supportable_dr_alignment)
1149 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1150 LOOP_LOC (loop_vinfo)))
1151 fprintf (vect_dump, "not vectorized: unsupported unaligned load.");
1154 if (supportable_dr_alignment != dr_aligned
1155 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1156 fprintf (vect_dump, "Vectorizing an unaligned access.");
1158 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1160 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1161 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1162 if (!supportable_dr_alignment)
1164 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1165 LOOP_LOC (loop_vinfo)))
1166 fprintf (vect_dump, "not vectorized: unsupported unaligned store.");
1169 if (supportable_dr_alignment != dr_aligned
1170 && (vect_print_dump_info (REPORT_ALIGNMENT, LOOP_LOC (loop_vinfo))))
1171 fprintf (vect_dump, "Vectorizing an unaligned access.");
1178 /* Function vect_analyze_data_ref_access.
1180 Analyze the access pattern of the data-reference DR. For now, a data access
1181 has to consecutive to be considered vectorizable. */
1184 vect_analyze_data_ref_access (struct data_reference *dr)
1186 tree stmt = DR_STMT (dr);
1187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1188 tree step = STMT_VINFO_VECT_STEP (stmt_info);
1189 tree scalar_type = TREE_TYPE (DR_REF (dr));
1191 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1193 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1194 fprintf (vect_dump, "not consecutive access");
1201 /* Function vect_analyze_data_ref_accesses.
1203 Analyze the access pattern of all the data references in the loop.
1205 FORNOW: the only access pattern that is considered vectorizable is a
1206 simple step 1 (consecutive) access.
1208 FORNOW: handle only arrays and pointer accesses. */
1211 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1214 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1215 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1217 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1218 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1220 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1222 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1223 bool ok = vect_analyze_data_ref_access (dr);
1226 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1227 LOOP_LOC (loop_vinfo)))
1228 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1233 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1235 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1236 bool ok = vect_analyze_data_ref_access (dr);
1239 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1240 LOOP_LOC (loop_vinfo)))
1241 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1250 /* Function vect_analyze_pointer_ref_access.
1253 STMT - a stmt that contains a data-ref.
1254 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
1255 ACCESS_FN - the access function of MEMREF.
1258 If the data-ref access is vectorizable, return a data_reference structure
1259 that represents it (DR). Otherwise - return NULL.
1260 STEP - the stride of MEMREF in the loop.
1261 INIT - the initial condition of MEMREF in the loop.
1264 static struct data_reference *
1265 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read,
1266 tree access_fn, tree *ptr_init, tree *ptr_step)
1268 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1269 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1270 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1272 tree reftype, innertype;
1273 tree indx_access_fn;
1274 int loopnum = loop->num;
1275 struct data_reference *dr;
1277 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step))
1279 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1280 LOOP_LOC (loop_vinfo)))
1281 fprintf (vect_dump, "not vectorized: pointer access is not simple.");
1287 if (!expr_invariant_in_loop_p (loop, init))
1289 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1290 LOOP_LOC (loop_vinfo)))
1292 "not vectorized: initial condition is not loop invariant.");
1296 if (TREE_CODE (step) != INTEGER_CST)
1298 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1299 LOOP_LOC (loop_vinfo)))
1301 "not vectorized: non constant step for pointer access.");
1305 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
1306 if (TREE_CODE (reftype) != POINTER_TYPE)
1308 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1309 LOOP_LOC (loop_vinfo)))
1310 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1314 reftype = TREE_TYPE (init);
1315 if (TREE_CODE (reftype) != POINTER_TYPE)
1317 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1318 LOOP_LOC (loop_vinfo)))
1319 fprintf (vect_dump, "not vectorized: unexpected pointer access form.");
1323 *ptr_step = fold_convert (ssizetype, step);
1324 innertype = TREE_TYPE (reftype);
1325 /* Check that STEP is a multiple of type size. */
1326 if (!integer_zerop (size_binop (TRUNC_MOD_EXPR, *ptr_step,
1327 fold_convert (ssizetype, TYPE_SIZE_UNIT (innertype)))))
1329 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1330 LOOP_LOC (loop_vinfo)))
1331 fprintf (vect_dump, "not vectorized: non consecutive access.");
1336 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
1337 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1339 fprintf (vect_dump, "Access function of ptr indx: ");
1340 print_generic_expr (vect_dump, indx_access_fn, TDF_SLIM);
1342 dr = init_data_ref (stmt, memref, NULL_TREE, indx_access_fn, is_read);
1348 /* Function vect_get_memtag.
1350 The function returns the relevant variable for memory tag (for aliasing
1354 vect_get_memtag (tree memref, struct data_reference *dr)
1358 switch (TREE_CODE (memref))
1361 symbl = SSA_NAME_VAR (memref);
1362 tag = get_var_ann (symbl)->type_mem_tag;
1365 tree ptr = TREE_OPERAND (DR_REF (dr), 0);
1366 if (TREE_CODE (ptr) == SSA_NAME)
1367 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
1372 return TREE_OPERAND (memref, 0);
1380 /* Function vect_address_analysis
1382 Return the BASE of the address expression EXPR.
1383 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1386 EXPR - the address expression that is being analyzed
1387 STMT - the statement that contains EXPR or its original memory reference
1388 IS_READ - TRUE if STMT reads from EXPR, FALSE if writes to EXPR
1389 VECTYPE - the type that defines the alignment (i.e, we compute
1390 alignment relative to TYPE_ALIGN(VECTYPE))
1391 DR - data_reference struct for the original memory reference
1394 BASE (returned value) - the base of the data reference EXPR.
1395 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1396 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1397 computation is impossible
1398 STEP - evolution of EXPR in the loop
1399 BASE_ALIGNED - indicates if BASE is aligned
1401 If something unexpected is encountered (an unsupported form of data-ref),
1402 then NULL_TREE is returned.
1406 vect_address_analysis (tree expr, tree stmt, bool is_read, tree vectype,
1407 struct data_reference *dr, tree *offset, tree *misalign,
1408 tree *step, bool *base_aligned)
1410 tree oprnd0, oprnd1, base_address, offset_expr, base_addr0, base_addr1;
1411 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1413 switch (TREE_CODE (expr))
1417 /* EXPR is of form {base +/- offset} (or {offset +/- base}). */
1418 oprnd0 = TREE_OPERAND (expr, 0);
1419 oprnd1 = TREE_OPERAND (expr, 1);
1421 STRIP_NOPS (oprnd0);
1422 STRIP_NOPS (oprnd1);
1424 /* Recursively try to find the base of the address contained in EXPR.
1425 For offset, the returned base will be NULL. */
1426 base_addr0 = vect_address_analysis (oprnd0, stmt, is_read, vectype, dr,
1427 &address_offset, &address_misalign, step,
1430 base_addr1 = vect_address_analysis (oprnd1, stmt, is_read, vectype, dr,
1431 &address_offset, &address_misalign, step,
1434 /* We support cases where only one of the operands contains an
1436 if ((base_addr0 && base_addr1) || (!base_addr0 && !base_addr1))
1439 /* To revert STRIP_NOPS. */
1440 oprnd0 = TREE_OPERAND (expr, 0);
1441 oprnd1 = TREE_OPERAND (expr, 1);
1443 offset_expr = base_addr0 ?
1444 fold_convert (ssizetype, oprnd1) : fold_convert (ssizetype, oprnd0);
1446 /* EXPR is of form {base +/- offset} (or {offset +/- base}). If offset is
1447 a number, we can add it to the misalignment value calculated for base,
1448 otherwise, misalignment is NULL. */
1449 if (TREE_CODE (offset_expr) == INTEGER_CST && address_misalign)
1450 *misalign = size_binop (TREE_CODE (expr), address_misalign,
1453 *misalign = NULL_TREE;
1455 /* Combine offset (from EXPR {base + offset}) with the offset calculated
1457 *offset = size_binop (TREE_CODE (expr), address_offset, offset_expr);
1458 return base_addr0 ? base_addr0 : base_addr1;
1461 base_address = vect_object_analysis (TREE_OPERAND (expr, 0), stmt, is_read,
1462 vectype, &dr, offset, misalign, step,
1464 return base_address;
1467 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1470 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1472 if (vect_get_ptr_offset (expr, vectype, misalign))
1473 *base_aligned = true;
1475 *base_aligned = false;
1479 *base_aligned = true;
1480 *misalign = ssize_int (0);
1482 *offset = ssize_int (0);
1483 *step = ssize_int (0);
1492 /* Function vect_object_analysis
1494 Return the BASE of the data reference MEMREF.
1495 Also compute the INITIAL_OFFSET from BASE, MISALIGN and STEP.
1496 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1497 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1498 instantiated with initial_conditions of access_functions of variables,
1499 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1501 Function get_inner_reference is used for the above in case of ARRAY_REF and
1504 The structure of the function is as follows:
1506 Case 1. For handled_component_p refs
1507 1.1 call get_inner_reference
1508 1.1.1 analyze offset expr received from get_inner_reference
1509 1.2. build data-reference structure for MEMREF
1510 (fall through with BASE)
1511 Case 2. For declarations
1513 2.2 update DR_BASE_NAME if necessary for alias
1514 Case 3. For INDIRECT_REFs
1515 3.1 get the access function
1516 3.2 analyze evolution of MEMREF
1517 3.3 set data-reference structure for MEMREF
1518 3.4 call vect_address_analysis to analyze INIT of the access function
1521 Combine the results of object and address analysis to calculate
1522 INITIAL_OFFSET, STEP and misalignment info.
1525 MEMREF - the memory reference that is being analyzed
1526 STMT - the statement that contains MEMREF
1527 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
1528 VECTYPE - the type that defines the alignment (i.e, we compute
1529 alignment relative to TYPE_ALIGN(VECTYPE))
1532 BASE_ADDRESS (returned value) - the base address of the data reference MEMREF
1533 E.g, if MEMREF is a.b[k].c[i][j] the returned
1535 DR - data_reference struct for MEMREF
1536 INITIAL_OFFSET - initial offset of MEMREF from BASE (an expression)
1537 MISALIGN - offset of MEMREF from BASE in bytes (a constant) or NULL_TREE if
1538 the computation is impossible
1539 STEP - evolution of the DR_REF in the loop
1540 BASE_ALIGNED - indicates if BASE is aligned
1542 If something unexpected is encountered (an unsupported form of data-ref),
1543 then NULL_TREE is returned. */
1546 vect_object_analysis (tree memref, tree stmt, bool is_read,
1547 tree vectype, struct data_reference **dr,
1548 tree *offset, tree *misalign, tree *step,
1551 tree base = NULL_TREE, base_address = NULL_TREE;
1552 tree object_offset = ssize_int (0), object_misalign = ssize_int (0);
1553 tree object_step = ssize_int (0), address_step = ssize_int (0);
1554 bool object_base_aligned = true, address_base_aligned = true;
1555 tree address_offset = ssize_int (0), address_misalign = ssize_int (0);
1556 HOST_WIDE_INT pbitsize, pbitpos;
1557 tree poffset, bit_pos_in_bytes;
1558 enum machine_mode pmode;
1559 int punsignedp, pvolatilep;
1560 tree ptr_step = ssize_int (0), ptr_init = NULL_TREE;
1561 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1562 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1563 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1564 struct data_reference *ptr_dr = NULL;
1565 tree access_fn, evolution_part, address_to_analyze;
1568 /* Case 1. handled_component_p refs. */
1569 if (handled_component_p (memref))
1571 /* 1.1 call get_inner_reference. */
1572 /* Find the base and the offset from it. */
1573 base = get_inner_reference (memref, &pbitsize, &pbitpos, &poffset,
1574 &pmode, &punsignedp, &pvolatilep, false);
1578 /* 1.1.1 analyze offset expr received from get_inner_reference. */
1580 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1581 &object_offset, &object_misalign, &object_step))
1583 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1585 fprintf (vect_dump, "failed to compute offset or step for ");
1586 print_generic_expr (vect_dump, memref, TDF_SLIM);
1591 /* Add bit position to OFFSET and MISALIGN. */
1593 bit_pos_in_bytes = ssize_int (pbitpos/BITS_PER_UNIT);
1594 /* Check that there is no remainder in bits. */
1595 if (pbitpos%BITS_PER_UNIT)
1597 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1598 fprintf (vect_dump, "bit offset alignment.");
1601 object_offset = size_binop (PLUS_EXPR, bit_pos_in_bytes, object_offset);
1602 if (object_misalign)
1603 object_misalign = size_binop (PLUS_EXPR, object_misalign,
1606 /* Create data-reference for MEMREF. TODO: handle COMPONENT_REFs. */
1609 if (TREE_CODE (memref) == ARRAY_REF)
1610 *dr = analyze_array (stmt, memref, is_read);
1615 memref = base; /* To continue analysis of BASE. */
1619 /* Part 1: Case 2. Declarations. */
1620 if (DECL_P (memref))
1622 /* We expect to get a decl only if we already have a DR. */
1625 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1627 fprintf (vect_dump, "unhandled decl ");
1628 print_generic_expr (vect_dump, memref, TDF_SLIM);
1633 /* 2.1 check the alignment. */
1634 if (DECL_ALIGN (memref) >= TYPE_ALIGN (vectype))
1635 object_base_aligned = true;
1637 object_base_aligned = false;
1639 /* 2.2 update DR_BASE_NAME if necessary. */
1640 if (!DR_BASE_NAME ((*dr)))
1641 /* For alias analysis. In case the analysis of INDIRECT_REF brought
1643 DR_BASE_NAME ((*dr)) = memref;
1645 base_address = build_fold_addr_expr (memref);
1648 /* Part 1: Case 3. INDIRECT_REFs. */
1649 else if (TREE_CODE (memref) == INDIRECT_REF)
1651 /* 3.1 get the access function. */
1652 access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
1655 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1656 LOOP_LOC (loop_vinfo)))
1657 fprintf (vect_dump, "not vectorized: complicated pointer access.");
1660 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1662 fprintf (vect_dump, "Access function of ptr: ");
1663 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
1666 /* 3.2 analyze evolution of MEMREF. */
1667 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
1670 ptr_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read,
1671 access_fn, &ptr_init, &ptr_step);
1675 object_step = size_binop (PLUS_EXPR, object_step, ptr_step);
1676 address_to_analyze = ptr_init;
1682 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1683 LOOP_LOC (loop_vinfo)))
1684 fprintf (vect_dump, "not vectorized: ptr is loop invariant.");
1687 /* Since there exists DR for MEMREF, we are analyzing the base of
1688 handled component, which not necessary has evolution in the
1690 address_to_analyze = TREE_OPERAND (base, 0);
1693 /* 3.3 set data-reference structure for MEMREF. */
1694 *dr = (*dr) ? *dr : ptr_dr;
1696 /* 3.4 call vect_address_analysis to analyze INIT of the access
1698 base_address = vect_address_analysis (address_to_analyze, stmt, is_read,
1699 vectype, *dr, &address_offset, &address_misalign,
1700 &address_step, &address_base_aligned);
1704 /* MEMREF cannot be analyzed. */
1707 /* Part 2: Combine the results of object and address analysis to calculate
1708 INITIAL_OFFSET, STEP and misalignment info. */
1709 *offset = size_binop (PLUS_EXPR, object_offset, address_offset);
1710 if (object_misalign && address_misalign)
1711 *misalign = size_binop (PLUS_EXPR, object_misalign, address_misalign);
1713 *misalign = NULL_TREE;
1714 *step = size_binop (PLUS_EXPR, object_step, address_step);
1715 *base_aligned = object_base_aligned && address_base_aligned;
1717 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1719 fprintf (vect_dump, "Results of object analysis for: ");
1720 print_generic_expr (vect_dump, memref, TDF_SLIM);
1721 fprintf (vect_dump, "\n\tbase: ");
1722 print_generic_expr (vect_dump, base, TDF_SLIM);
1723 fprintf (vect_dump, "\n\toffset: ");
1724 print_generic_expr (vect_dump, *offset, TDF_SLIM);
1725 fprintf (vect_dump, "\n\tstep: ");
1726 print_generic_expr (vect_dump, *step, TDF_SLIM);
1727 fprintf (vect_dump, "\n\tbase aligned %d\n\tmisalign: ", *base_aligned);
1728 print_generic_expr (vect_dump, *misalign, TDF_SLIM);
1730 return base_address;
1734 /* Function vect_analyze_data_refs.
1736 Find all the data references in the loop.
1738 The general structure of the analysis of data refs in the vectorizer is as
1740 1- vect_analyze_data_refs(loop):
1741 Find and analyze all data-refs in the loop:
1743 base_address = vect_object_analysis(ref)
1744 ref_stmt.memtag = vect_get_memtag(base)
1745 1.1- vect_object_analysis(ref):
1746 Analyze ref, and build a DR (data_referece struct) for it;
1747 compute base, initial_offset, step and alignment.
1748 Call get_inner_reference for refs handled in this function.
1749 Call vect_addr_analysis(addr) to analyze pointer type expressions.
1750 Set ref_stmt.base, ref_stmt.initial_offset, ref_stmt.alignment, and
1751 ref_stmt.step accordingly.
1752 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
1753 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1754 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1756 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
1757 which base is really an array (not a pointer) and which alignment
1758 can be forced. This restriction will be relaxed. */
1761 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1763 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1764 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1765 int nbbs = loop->num_nodes;
1766 block_stmt_iterator si;
1768 struct data_reference *dr;
1770 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1771 fprintf (vect_dump, "=== vect_analyze_data_refs ===");
1773 for (j = 0; j < nbbs; j++)
1775 basic_block bb = bbs[j];
1776 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1778 bool is_read = false;
1779 tree stmt = bsi_stmt (si);
1780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1781 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1782 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1783 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1784 varray_type *datarefs = NULL;
1785 int nvuses, nv_may_defs, nv_must_defs;
1787 tree scalar_type, vectype;
1788 tree base, offset, misalign, step, tag;
1791 /* Assumption: there exists a data-ref in stmt, if and only if
1792 it has vuses/vdefs. */
1794 if (!vuses && !v_may_defs && !v_must_defs)
1797 nvuses = NUM_VUSES (vuses);
1798 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1799 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1801 if (nvuses && (nv_may_defs || nv_must_defs))
1803 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1805 fprintf (vect_dump, "unexpected vdefs and vuses in stmt: ");
1806 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1811 if (TREE_CODE (stmt) != MODIFY_EXPR)
1813 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1815 fprintf (vect_dump, "unexpected vops in stmt: ");
1816 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1823 memref = TREE_OPERAND (stmt, 1);
1824 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
1829 memref = TREE_OPERAND (stmt, 0);
1830 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1834 scalar_type = TREE_TYPE (memref);
1835 vectype = get_vectype_for_scalar_type (scalar_type);
1838 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1840 fprintf (vect_dump, "no vectype for stmt: ");
1841 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1842 fprintf (vect_dump, " scalar_type: ");
1843 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1845 /* It is not possible to vectorize this data reference. */
1848 /* Analyze MEMREF. If it is of a supported form, build data_reference
1849 struct for it (DR). */
1851 base = vect_object_analysis (memref, stmt, is_read, vectype, &dr,
1852 &offset, &misalign, &step,
1856 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1857 LOOP_LOC (loop_vinfo)))
1859 fprintf (vect_dump, "not vectorized: unhandled data ref: ");
1860 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1864 /* Find memtag for aliasing purposes. */
1865 tag = vect_get_memtag (base, dr);
1868 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
1869 LOOP_LOC (loop_vinfo)))
1871 fprintf (vect_dump, "not vectorized: no memtag ref: ");
1872 print_generic_expr (vect_dump, memref, TDF_SLIM);
1876 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info) = base;
1877 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1878 STMT_VINFO_VECT_STEP (stmt_info) = step;
1879 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
1880 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned;
1881 STMT_VINFO_MEMTAG (stmt_info) = tag;
1882 STMT_VINFO_VECTYPE (stmt_info) = vectype;
1883 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
1884 STMT_VINFO_DATA_REF (stmt_info) = dr;
1892 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
1894 /* Function vect_mark_relevant.
1896 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
1899 vect_mark_relevant (varray_type *worklist, tree stmt)
1901 stmt_vec_info stmt_info;
1903 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1904 fprintf (vect_dump, "mark relevant.");
1906 if (TREE_CODE (stmt) == PHI_NODE)
1908 VARRAY_PUSH_TREE (*worklist, stmt);
1912 stmt_info = vinfo_for_stmt (stmt);
1916 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1918 fprintf (vect_dump, "mark relevant: no stmt info!!.");
1919 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1924 if (STMT_VINFO_RELEVANT_P (stmt_info))
1926 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1927 fprintf (vect_dump, "already marked relevant.");
1931 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
1932 VARRAY_PUSH_TREE (*worklist, stmt);
1936 /* Function vect_stmt_relevant_p.
1938 Return true if STMT in loop that is represented by LOOP_VINFO is
1939 "relevant for vectorization".
1941 A stmt is considered "relevant for vectorization" if:
1942 - it has uses outside the loop.
1943 - it has vdefs (it alters memory).
1944 - control stmts in the loop (except for the exit condition).
1946 CHECKME: what other side effects would the vectorizer allow? */
1949 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
1951 v_may_def_optype v_may_defs;
1952 v_must_def_optype v_must_defs;
1953 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1958 /* cond stmt other than loop exit cond. */
1959 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
1962 /* changing memory. */
1963 if (TREE_CODE (stmt) != PHI_NODE)
1965 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
1966 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
1967 if (v_may_defs || v_must_defs)
1969 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1970 fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
1975 /* uses outside the loop. */
1976 df = get_immediate_uses (stmt);
1977 num_uses = num_immediate_uses (df);
1978 for (i = 0; i < num_uses; i++)
1980 tree use = immediate_use (df, i);
1981 basic_block bb = bb_for_stmt (use);
1982 if (!flow_bb_inside_loop_p (loop, bb))
1984 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1985 fprintf (vect_dump, "vec_stmt_relevant_p: used out of loop.");
1994 /* Function vect_mark_stmts_to_be_vectorized.
1996 Not all stmts in the loop need to be vectorized. For example:
2005 Stmt 1 and 3 do not need to be vectorized, because loop control and
2006 addressing of vectorized data-refs are handled differently.
2008 This pass detects such stmts. */
2011 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
2013 varray_type worklist;
2014 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2015 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2016 unsigned int nbbs = loop->num_nodes;
2017 block_stmt_iterator si;
2023 stmt_vec_info stmt_info;
2027 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2028 fprintf (vect_dump, "=== vect_mark_stmts_to_be_vectorized ===");
2031 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2033 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2035 fprintf (vect_dump, "init: phi relevant? ");
2036 print_generic_expr (vect_dump, phi, TDF_SLIM);
2039 if (vect_stmt_relevant_p (phi, loop_vinfo))
2041 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2042 LOOP_LOC (loop_vinfo)))
2043 fprintf (vect_dump, "unsupported reduction/induction.");
2048 VARRAY_TREE_INIT (worklist, 64, "work list");
2050 /* 1. Init worklist. */
2052 for (i = 0; i < nbbs; i++)
2055 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2057 stmt = bsi_stmt (si);
2059 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2061 fprintf (vect_dump, "init: stmt relevant? ");
2062 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2065 stmt_info = vinfo_for_stmt (stmt);
2066 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
2068 if (vect_stmt_relevant_p (stmt, loop_vinfo))
2069 vect_mark_relevant (&worklist, stmt);
2074 /* 2. Process_worklist */
2076 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
2078 stmt = VARRAY_TOP_TREE (worklist);
2079 VARRAY_POP (worklist);
2081 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2083 fprintf (vect_dump, "worklist: examine stmt: ");
2084 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2087 /* Examine the USES in this statement. Mark all the statements which
2088 feed this statement's uses as "relevant", unless the USE is used as
2091 if (TREE_CODE (stmt) == PHI_NODE)
2093 /* follow the def-use chain inside the loop. */
2094 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
2096 tree arg = PHI_ARG_DEF (stmt, j);
2097 tree def_stmt = NULL_TREE;
2099 if (!vect_is_simple_use (arg, loop_vinfo, &def_stmt))
2101 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2102 LOOP_LOC (loop_vinfo)))
2103 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2104 varray_clear (worklist);
2110 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2112 fprintf (vect_dump, "worklist: def_stmt: ");
2113 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
2116 bb = bb_for_stmt (def_stmt);
2117 if (flow_bb_inside_loop_p (loop, bb))
2118 vect_mark_relevant (&worklist, def_stmt);
2122 ann = stmt_ann (stmt);
2123 use_ops = USE_OPS (ann);
2125 for (i = 0; i < NUM_USES (use_ops); i++)
2127 tree use = USE_OP (use_ops, i);
2129 /* We are only interested in uses that need to be vectorized. Uses
2130 that are used for address computation are not considered relevant.
2132 if (exist_non_indexing_operands_for_use_p (use, stmt))
2134 tree def_stmt = NULL_TREE;
2136 if (!vect_is_simple_use (use, loop_vinfo, &def_stmt))
2138 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS,
2139 LOOP_LOC (loop_vinfo)))
2140 fprintf (vect_dump, "not vectorized: unsupported use in stmt.");
2141 varray_clear (worklist);
2148 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2150 fprintf (vect_dump, "worklist: examine use %d: ", i);
2151 print_generic_expr (vect_dump, use, TDF_SLIM);
2154 bb = bb_for_stmt (def_stmt);
2155 if (flow_bb_inside_loop_p (loop, bb))
2156 vect_mark_relevant (&worklist, def_stmt);
2159 } /* while worklist */
2161 varray_clear (worklist);
2166 /* Function vect_can_advance_ivs_p
2168 In case the number of iterations that LOOP iterates in unknown at compile
2169 time, an epilog loop will be generated, and the loop induction variables
2170 (IVs) will be "advanced" to the value they are supposed to take just before
2171 the epilog loop. Here we check that the access function of the loop IVs
2172 and the expression that represents the loop bound are simple enough.
2173 These restrictions will be relaxed in the future. */
2176 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
2178 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2179 basic_block bb = loop->header;
2182 /* Analyze phi functions of the loop header. */
2184 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
2186 tree access_fn = NULL;
2187 tree evolution_part;
2189 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2191 fprintf (vect_dump, "Analyze phi: ");
2192 print_generic_expr (vect_dump, phi, TDF_SLIM);
2195 /* Skip virtual phi's. The data dependences that are associated with
2196 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2198 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2200 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2201 fprintf (vect_dump, "virtual phi. skip.");
2205 /* Analyze the evolution function. */
2207 access_fn = instantiate_parameters
2208 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
2212 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2213 fprintf (vect_dump, "No Access function.");
2217 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2219 fprintf (vect_dump, "Access function of PHI: ");
2220 print_generic_expr (vect_dump, access_fn, TDF_SLIM);
2223 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
2225 if (evolution_part == NULL_TREE)
2228 /* FORNOW: We do not transform initial conditions of IVs
2229 which evolution functions are a polynomial of degree >= 2. */
2231 if (tree_is_chrec (evolution_part))
2239 /* Function vect_get_loop_niters.
2241 Determine how many iterations the loop is executed.
2242 If an expression that represents the number of iterations
2243 can be constructed, place it in NUMBER_OF_ITERATIONS.
2244 Return the loop exit condition. */
2247 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
2251 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2252 fprintf (vect_dump, "=== get_loop_niters ===");
2254 niters = number_of_iterations_in_loop (loop);
2256 if (niters != NULL_TREE
2257 && niters != chrec_dont_know)
2259 *number_of_iterations = niters;
2261 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2263 fprintf (vect_dump, "==> get_loop_niters:" );
2264 print_generic_expr (vect_dump, *number_of_iterations, TDF_SLIM);
2268 return get_loop_exit_condition (loop);
2272 /* Function vect_analyze_loop_form.
2274 Verify the following restrictions (some may be relaxed in the future):
2275 - it's an inner-most loop
2276 - number of BBs = 2 (which are the loop header and the latch)
2277 - the loop has a pre-header
2278 - the loop has a single entry and exit
2279 - the loop exit condition is simple enough, and the number of iterations
2280 can be analyzed (a countable loop). */
2282 static loop_vec_info
2283 vect_analyze_loop_form (struct loop *loop)
2285 loop_vec_info loop_vinfo;
2287 tree number_of_iterations = NULL;
2288 bool rescan = false;
2291 loop_loc = find_loop_location (loop);
2293 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2294 fprintf (vect_dump, "=== vect_analyze_loop_form ===");
2298 if (vect_print_dump_info (REPORT_OUTER_LOOPS, loop_loc))
2299 fprintf (vect_dump, "not vectorized: nested loop.");
2303 if (!loop->single_exit
2304 || loop->num_nodes != 2
2305 || EDGE_COUNT (loop->header->preds) != 2
2306 || loop->num_entries != 1)
2308 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2310 if (!loop->single_exit)
2311 fprintf (vect_dump, "not vectorized: multiple exits.");
2312 else if (loop->num_nodes != 2)
2313 fprintf (vect_dump, "not vectorized: too many BBs in loop.");
2314 else if (EDGE_COUNT (loop->header->preds) != 2)
2315 fprintf (vect_dump, "not vectorized: too many incoming edges.");
2316 else if (loop->num_entries != 1)
2317 fprintf (vect_dump, "not vectorized: too many entries.");
2323 /* We assume that the loop exit condition is at the end of the loop. i.e,
2324 that the loop is represented as a do-while (with a proper if-guard
2325 before the loop if needed), where the loop header contains all the
2326 executable statements, and the latch is empty. */
2327 if (!empty_block_p (loop->latch))
2329 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2330 fprintf (vect_dump, "not vectorized: unexpectd loop form.");
2334 /* Make sure we have a preheader basic block. */
2335 if (!loop->pre_header)
2338 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2341 /* Make sure there exists a single-predecessor exit bb: */
2342 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
2345 loop_split_edge_with (loop->exit_edges[0], NULL);
2350 flow_loop_scan (loop, LOOP_ALL);
2351 /* Flow loop scan does not update loop->single_exit field. */
2352 loop->single_exit = loop->exit_edges[0];
2355 if (empty_block_p (loop->header))
2357 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2358 fprintf (vect_dump, "not vectorized: empty loop.");
2362 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
2365 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2366 fprintf (vect_dump, "not vectorized: complicated exit condition.");
2370 if (!number_of_iterations)
2372 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2374 "not vectorized: number of iterations cannot be computed.");
2378 if (chrec_contains_undetermined (number_of_iterations))
2380 if (vect_print_dump_info (REPORT_BAD_FORM_LOOPS, loop_loc))
2381 fprintf (vect_dump, "Infinite number of iterations.");
2385 loop_vinfo = new_loop_vec_info (loop);
2386 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
2388 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2390 if (vect_print_dump_info (REPORT_DETAILS, loop_loc))
2392 fprintf (vect_dump, "Symbolic number of iterations is ");
2393 print_generic_expr (vect_dump, number_of_iterations, TDF_DETAILS);
2397 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
2399 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS, loop_loc))
2400 fprintf (vect_dump, "not vectorized: number of iterations = 0.");
2404 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
2405 LOOP_VINFO_LOC (loop_vinfo) = loop_loc;
2411 /* Function vect_analyze_loop.
2413 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2414 for it. The different analyses will record information in the
2415 loop_vec_info struct. */
2417 vect_analyze_loop (struct loop *loop)
2420 loop_vec_info loop_vinfo;
2422 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2423 fprintf (vect_dump, "===== analyze_loop_nest =====");
2425 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
2427 loop_vinfo = vect_analyze_loop_form (loop);
2430 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
2431 fprintf (vect_dump, "bad loop form.");
2435 /* Find all data references in the loop (which correspond to vdefs/vuses)
2436 and analyze their evolution in the loop.
2438 FORNOW: Handle only simple, array references, which
2439 alignment can be forced, and aligned pointer-references. */
2441 ok = vect_analyze_data_refs (loop_vinfo);
2444 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2445 fprintf (vect_dump, "bad data references.");
2446 destroy_loop_vec_info (loop_vinfo);
2450 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
2452 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
2455 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2456 fprintf (vect_dump, "unexpected pattern.");
2457 destroy_loop_vec_info (loop_vinfo);
2461 /* Check that all cross-iteration scalar data-flow cycles are OK.
2462 Cross-iteration cycles caused by virtual phis are analyzed separately. */
2464 ok = vect_analyze_scalar_cycles (loop_vinfo);
2467 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2468 fprintf (vect_dump, "bad scalar cycle.");
2469 destroy_loop_vec_info (loop_vinfo);
2473 /* Analyze data dependences between the data-refs in the loop.
2474 FORNOW: fail at the first data dependence that we encounter. */
2476 ok = vect_analyze_data_ref_dependences (loop_vinfo);
2479 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2480 fprintf (vect_dump, "bad data dependence.");
2481 destroy_loop_vec_info (loop_vinfo);
2485 /* Analyze the access patterns of the data-refs in the loop (consecutive,
2486 complex, etc.). FORNOW: Only handle consecutive access pattern. */
2488 ok = vect_analyze_data_ref_accesses (loop_vinfo);
2491 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2492 fprintf (vect_dump, "bad data access.");
2493 destroy_loop_vec_info (loop_vinfo);
2497 /* Analyze the alignment of the data-refs in the loop.
2498 FORNOW: Only aligned accesses are handled. */
2500 ok = vect_analyze_data_refs_alignment (loop_vinfo);
2503 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2504 fprintf (vect_dump, "bad data alignment.");
2505 destroy_loop_vec_info (loop_vinfo);
2509 /* Scan all the operations in the loop and make sure they are
2512 ok = vect_analyze_operations (loop_vinfo);
2515 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
2516 fprintf (vect_dump, "bad operation or unsupported loop bound.");
2517 destroy_loop_vec_info (loop_vinfo);
2521 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;