1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 scalar_type = rhs_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
102 while (next_stmt && next_stmt != stmt)
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
166 NEXT(FIRST_DR) = previous FIRST_DR
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
293 /* Function vect_equal_offsets.
295 Check if OFFSET1 and OFFSET2 are identical expressions. */
298 vect_equal_offsets (tree offset1, tree offset2)
302 STRIP_NOPS (offset1);
303 STRIP_NOPS (offset2);
305 if (offset1 == offset2)
308 if (TREE_CODE (offset1) != TREE_CODE (offset2)
309 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
312 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
313 TREE_OPERAND (offset2, 0));
315 if (!res || !BINARY_CLASS_P (offset1))
318 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
319 TREE_OPERAND (offset2, 1));
325 /* Check dependence between DRA and DRB for basic block vectorization.
326 If the accesses share same bases and offsets, we can compare their initial
327 constant offsets to decide whether they differ or not. In case of a read-
328 write dependence we check that the load is before the store to ensure that
329 vectorization will not change the order of the accesses. */
332 vect_drs_dependent_in_basic_block (struct data_reference *dra,
333 struct data_reference *drb)
335 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
338 /* We only call this function for pairs of loads and stores, but we verify
340 if (DR_IS_READ (dra) == DR_IS_READ (drb))
342 if (DR_IS_READ (dra))
348 /* Check that the data-refs have same bases and offsets. If not, we can't
349 determine if they are dependent. */
350 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
351 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
352 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
353 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
354 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
355 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
358 /* Check the types. */
359 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
360 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
362 if (type_size_a != type_size_b
363 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
364 TREE_TYPE (DR_REF (drb))))
367 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
368 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
370 /* Two different locations - no dependence. */
371 if (init_a != init_b)
374 /* We have a read-write dependence. Check that the load is before the store.
375 When we vectorize basic blocks, vector load can be only before
376 corresponding scalar load, and vector store can be only after its
377 corresponding scalar store. So the order of the acceses is preserved in
378 case the load is before the store. */
379 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
380 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
387 /* Function vect_check_interleaving.
389 Check if DRA and DRB are a part of interleaving. In case they are, insert
390 DRA and DRB in an interleaving chain. */
393 vect_check_interleaving (struct data_reference *dra,
394 struct data_reference *drb)
396 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
398 /* Check that the data-refs have same first location (except init) and they
399 are both either store or load (not load and store). */
400 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
401 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
402 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
403 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
404 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
405 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
406 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
407 || DR_IS_READ (dra) != DR_IS_READ (drb))
411 1. data-refs are of the same type
412 2. their steps are equal
413 3. the step (if greater than zero) is greater than the difference between
415 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
416 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
418 if (type_size_a != type_size_b
419 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
420 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
421 TREE_TYPE (DR_REF (drb))))
424 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
425 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
426 step = TREE_INT_CST_LOW (DR_STEP (dra));
430 /* If init_a == init_b + the size of the type * k, we have an interleaving,
431 and DRB is accessed before DRA. */
432 diff_mod_size = (init_a - init_b) % type_size_a;
434 if (step && (init_a - init_b) > step)
437 if (diff_mod_size == 0)
439 vect_update_interleaving_chain (drb, dra);
440 if (vect_print_dump_info (REPORT_DR_DETAILS))
442 fprintf (vect_dump, "Detected interleaving ");
443 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
444 fprintf (vect_dump, " and ");
445 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
452 /* If init_b == init_a + the size of the type * k, we have an
453 interleaving, and DRA is accessed before DRB. */
454 diff_mod_size = (init_b - init_a) % type_size_a;
456 if (step && (init_b - init_a) > step)
459 if (diff_mod_size == 0)
461 vect_update_interleaving_chain (dra, drb);
462 if (vect_print_dump_info (REPORT_DR_DETAILS))
464 fprintf (vect_dump, "Detected interleaving ");
465 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
466 fprintf (vect_dump, " and ");
467 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
476 /* Check if data references pointed by DR_I and DR_J are same or
477 belong to same interleaving group. Return FALSE if drs are
478 different, otherwise return TRUE. */
481 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
483 gimple stmt_i = DR_STMT (dr_i);
484 gimple stmt_j = DR_STMT (dr_j);
486 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
487 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
488 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
489 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
490 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
496 /* If address ranges represented by DDR_I and DDR_J are equal,
497 return TRUE, otherwise return FALSE. */
500 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
502 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
504 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
505 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
511 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
512 tested at run-time. Return TRUE if DDR was successfully inserted.
513 Return false if versioning is not supported. */
516 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
518 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
520 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
523 if (vect_print_dump_info (REPORT_DR_DETAILS))
525 fprintf (vect_dump, "mark for run-time aliasing test between ");
526 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
527 fprintf (vect_dump, " and ");
528 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
531 if (optimize_loop_nest_for_size_p (loop))
533 if (vect_print_dump_info (REPORT_DR_DETAILS))
534 fprintf (vect_dump, "versioning not supported when optimizing for size.");
538 /* FORNOW: We don't support versioning with outer-loop vectorization. */
541 if (vect_print_dump_info (REPORT_DR_DETAILS))
542 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
546 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
551 /* Function vect_analyze_data_ref_dependence.
553 Return TRUE if there (might) exist a dependence between a memory-reference
554 DRA and a memory-reference DRB. When versioning for alias may check a
555 dependence at run-time, return FALSE. Adjust *MAX_VF according to
556 the data dependence. */
559 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
560 loop_vec_info loop_vinfo, int *max_vf,
561 bool *data_dependence_in_bb)
564 struct loop *loop = NULL;
565 struct data_reference *dra = DDR_A (ddr);
566 struct data_reference *drb = DDR_B (ddr);
567 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
568 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
569 lambda_vector dist_v;
570 unsigned int loop_depth;
572 /* Don't bother to analyze statements marked as unvectorizable. */
573 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
574 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
577 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
579 /* Independent data accesses. */
580 vect_check_interleaving (dra, drb);
585 loop = LOOP_VINFO_LOOP (loop_vinfo);
587 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
590 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
594 if (vect_print_dump_info (REPORT_DR_DETAILS))
596 fprintf (vect_dump, "versioning for alias required: "
597 "can't determine dependence between ");
598 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
599 fprintf (vect_dump, " and ");
600 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
603 /* Add to list of ddrs that need to be tested at run-time. */
604 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
607 /* When vectorizing a basic block unknown depnedence can still mean
609 if (vect_check_interleaving (dra, drb))
612 if (vect_print_dump_info (REPORT_DR_DETAILS))
614 fprintf (vect_dump, "can't determine dependence between ");
615 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
616 fprintf (vect_dump, " and ");
617 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
620 /* We do not vectorize basic blocks with write-write dependencies. */
621 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
624 /* We deal with read-write dependencies in basic blocks later (by
625 verifying that all the loads in the basic block are before all the
627 *data_dependence_in_bb = true;
631 /* Versioning for alias is not yet supported for basic block SLP, and
632 dependence distance is unapplicable, hence, in case of known data
633 dependence, basic block vectorization is impossible for now. */
636 if (dra != drb && vect_check_interleaving (dra, drb))
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
641 fprintf (vect_dump, "determined dependence between ");
642 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643 fprintf (vect_dump, " and ");
644 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
647 /* Do not vectorize basic blcoks with write-write dependences. */
648 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
651 /* Check if this dependence is allowed in basic block vectorization. */
652 return vect_drs_dependent_in_basic_block (dra, drb);
655 /* Loop-based vectorization and known data dependence. */
656 if (DDR_NUM_DIST_VECTS (ddr) == 0)
658 if (vect_print_dump_info (REPORT_DR_DETAILS))
660 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 fprintf (vect_dump, " and ");
663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
665 /* Add to list of ddrs that need to be tested at run-time. */
666 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
669 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
670 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
672 int dist = dist_v[loop_depth];
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
675 fprintf (vect_dump, "dependence distance = %d.", dist);
679 if (vect_print_dump_info (REPORT_DR_DETAILS))
681 fprintf (vect_dump, "dependence distance == 0 between ");
682 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
683 fprintf (vect_dump, " and ");
684 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
687 /* For interleaving, mark that there is a read-write dependency if
688 necessary. We check before that one of the data-refs is store. */
689 if (DR_IS_READ (dra))
690 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
693 if (DR_IS_READ (drb))
694 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
700 if (dist > 0 && DDR_REVERSED_P (ddr))
702 /* If DDR_REVERSED_P the order of the data-refs in DDR was
703 reversed (to make distance vector positive), and the actual
704 distance is negative. */
705 if (vect_print_dump_info (REPORT_DR_DETAILS))
706 fprintf (vect_dump, "dependence distance negative.");
711 && abs (dist) < *max_vf)
713 /* The dependence distance requires reduction of the maximal
714 vectorization factor. */
715 *max_vf = abs (dist);
716 if (vect_print_dump_info (REPORT_DR_DETAILS))
717 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
721 if (abs (dist) >= *max_vf)
723 /* Dependence distance does not create dependence, as far as
724 vectorization is concerned, in this case. */
725 if (vect_print_dump_info (REPORT_DR_DETAILS))
726 fprintf (vect_dump, "dependence distance >= VF.");
730 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
732 fprintf (vect_dump, "not vectorized, possible dependence "
733 "between data-refs ");
734 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
735 fprintf (vect_dump, " and ");
736 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
745 /* Function vect_analyze_data_ref_dependences.
747 Examine all the data references in the loop, and make sure there do not
748 exist any data dependences between them. Set *MAX_VF according to
749 the maximum vectorization factor the data dependences allow. */
752 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
753 bb_vec_info bb_vinfo, int *max_vf,
754 bool *data_dependence_in_bb)
757 VEC (ddr_p, heap) *ddrs = NULL;
758 struct data_dependence_relation *ddr;
760 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "=== vect_analyze_dependences ===");
764 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
766 ddrs = BB_VINFO_DDRS (bb_vinfo);
768 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
769 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
770 data_dependence_in_bb))
777 /* Function vect_compute_data_ref_alignment
779 Compute the misalignment of the data reference DR.
782 1. If during the misalignment computation it is found that the data reference
783 cannot be vectorized then false is returned.
784 2. DR_MISALIGNMENT (DR) is defined.
786 FOR NOW: No analysis is actually performed. Misalignment is calculated
787 only for trivial cases. TODO. */
790 vect_compute_data_ref_alignment (struct data_reference *dr)
792 gimple stmt = DR_STMT (dr);
793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
794 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
795 struct loop *loop = NULL;
796 tree ref = DR_REF (dr);
798 tree base, base_addr;
801 tree aligned_to, alignment;
803 if (vect_print_dump_info (REPORT_DETAILS))
804 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
807 loop = LOOP_VINFO_LOOP (loop_vinfo);
809 /* Initialize misalignment to unknown. */
810 SET_DR_MISALIGNMENT (dr, -1);
812 misalign = DR_INIT (dr);
813 aligned_to = DR_ALIGNED_TO (dr);
814 base_addr = DR_BASE_ADDRESS (dr);
815 vectype = STMT_VINFO_VECTYPE (stmt_info);
817 /* In case the dataref is in an inner-loop of the loop that is being
818 vectorized (LOOP), we use the base and misalignment information
819 relative to the outer-loop (LOOP). This is ok only if the misalignment
820 stays the same throughout the execution of the inner-loop, which is why
821 we have to check that the stride of the dataref in the inner-loop evenly
822 divides by the vector size. */
823 if (loop && nested_in_vect_loop_p (loop, stmt))
825 tree step = DR_STEP (dr);
826 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
828 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
830 if (vect_print_dump_info (REPORT_ALIGNMENT))
831 fprintf (vect_dump, "inner step divides the vector-size.");
832 misalign = STMT_VINFO_DR_INIT (stmt_info);
833 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
834 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
838 if (vect_print_dump_info (REPORT_ALIGNMENT))
839 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
840 misalign = NULL_TREE;
844 base = build_fold_indirect_ref (base_addr);
845 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
847 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
850 if (vect_print_dump_info (REPORT_ALIGNMENT))
852 fprintf (vect_dump, "Unknown alignment for access: ");
853 print_generic_expr (vect_dump, base, TDF_SLIM);
859 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
861 || (TREE_CODE (base_addr) == SSA_NAME
862 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
863 TREE_TYPE (base_addr)))),
867 base_aligned = false;
871 /* Do not change the alignment of global variables if
872 flag_section_anchors is enabled. */
873 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
874 || (TREE_STATIC (base) && flag_section_anchors))
876 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "can't force alignment of ref: ");
879 print_generic_expr (vect_dump, ref, TDF_SLIM);
884 /* Force the alignment of the decl.
885 NOTE: This is the only change to the code we make during
886 the analysis phase, before deciding to vectorize the loop. */
887 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "force alignment of ");
890 print_generic_expr (vect_dump, ref, TDF_SLIM);
893 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
894 DECL_USER_ALIGN (base) = 1;
897 /* At this point we assume that the base is aligned. */
898 gcc_assert (base_aligned
899 || (TREE_CODE (base) == VAR_DECL
900 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
902 /* If this is a backward running DR then first access in the larger
903 vectype actually is N-1 elements before the address in the DR.
904 Adjust misalign accordingly. */
905 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
907 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
908 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
909 otherwise we wouldn't be here. */
910 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
911 /* PLUS because DR_STEP was negative. */
912 misalign = size_binop (PLUS_EXPR, misalign, offset);
915 /* Modulo alignment. */
916 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
918 if (!host_integerp (misalign, 1))
920 /* Negative or overflowed misalignment value. */
921 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "unexpected misalign value");
926 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
928 if (vect_print_dump_info (REPORT_DETAILS))
930 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
931 print_generic_expr (vect_dump, ref, TDF_SLIM);
938 /* Function vect_compute_data_refs_alignment
940 Compute the misalignment of data references in the loop.
941 Return FALSE if a data reference is found that cannot be vectorized. */
944 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
945 bb_vec_info bb_vinfo)
947 VEC (data_reference_p, heap) *datarefs;
948 struct data_reference *dr;
952 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
954 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
956 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
957 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
958 && !vect_compute_data_ref_alignment (dr))
962 /* Mark unsupported statement as unvectorizable. */
963 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
974 /* Function vect_update_misalignment_for_peel
976 DR - the data reference whose misalignment is to be adjusted.
977 DR_PEEL - the data reference whose misalignment is being made
978 zero in the vector loop by the peel.
979 NPEEL - the number of iterations in the peel loop if the misalignment
980 of DR_PEEL is known at compile time. */
983 vect_update_misalignment_for_peel (struct data_reference *dr,
984 struct data_reference *dr_peel, int npeel)
987 VEC(dr_p,heap) *same_align_drs;
988 struct data_reference *current_dr;
989 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
990 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
991 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
992 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
994 /* For interleaved data accesses the step in the loop must be multiplied by
995 the size of the interleaving group. */
996 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
997 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
998 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
999 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1001 /* It can be assumed that the data refs with the same alignment as dr_peel
1002 are aligned in the vector loop. */
1004 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1005 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1007 if (current_dr != dr)
1009 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1010 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1011 SET_DR_MISALIGNMENT (dr, 0);
1015 if (known_alignment_for_access_p (dr)
1016 && known_alignment_for_access_p (dr_peel))
1018 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1019 int misal = DR_MISALIGNMENT (dr);
1020 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1021 misal += negative ? -npeel * dr_size : npeel * dr_size;
1022 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
1023 SET_DR_MISALIGNMENT (dr, misal);
1027 if (vect_print_dump_info (REPORT_DETAILS))
1028 fprintf (vect_dump, "Setting misalignment to -1.");
1029 SET_DR_MISALIGNMENT (dr, -1);
1033 /* Function vect_verify_datarefs_alignment
1035 Return TRUE if all data references in the loop can be
1036 handled with respect to alignment. */
1039 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1041 VEC (data_reference_p, heap) *datarefs;
1042 struct data_reference *dr;
1043 enum dr_alignment_support supportable_dr_alignment;
1047 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1049 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1051 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1053 gimple stmt = DR_STMT (dr);
1054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1056 /* For interleaving, only the alignment of the first access matters.
1057 Skip statements marked as not vectorizable. */
1058 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1059 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1060 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1063 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1064 if (!supportable_dr_alignment)
1066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1068 if (DR_IS_READ (dr))
1070 "not vectorized: unsupported unaligned load.");
1073 "not vectorized: unsupported unaligned store.");
1075 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1079 if (supportable_dr_alignment != dr_aligned
1080 && vect_print_dump_info (REPORT_ALIGNMENT))
1081 fprintf (vect_dump, "Vectorizing an unaligned access.");
1087 /* Function vector_alignment_reachable_p
1089 Return true if vector alignment for DR is reachable by peeling
1090 a few loop iterations. Return false otherwise. */
1093 vector_alignment_reachable_p (struct data_reference *dr)
1095 gimple stmt = DR_STMT (dr);
1096 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1099 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1101 /* For interleaved access we peel only if number of iterations in
1102 the prolog loop ({VF - misalignment}), is a multiple of the
1103 number of the interleaved accesses. */
1104 int elem_size, mis_in_elements;
1105 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1107 /* FORNOW: handle only known alignment. */
1108 if (!known_alignment_for_access_p (dr))
1111 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1112 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1114 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1118 /* If misalignment is known at the compile time then allow peeling
1119 only if natural alignment is reachable through peeling. */
1120 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1122 HOST_WIDE_INT elmsize =
1123 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1124 if (vect_print_dump_info (REPORT_DETAILS))
1126 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1127 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1129 if (DR_MISALIGNMENT (dr) % elmsize)
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1137 if (!known_alignment_for_access_p (dr))
1139 tree type = (TREE_TYPE (DR_REF (dr)));
1140 tree ba = DR_BASE_OBJECT (dr);
1141 bool is_packed = false;
1144 is_packed = contains_packed_reference (ba);
1146 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1149 if (vect_print_dump_info (REPORT_DETAILS))
1150 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1151 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1161 /* Calculate the cost of the memory access represented by DR. */
1164 vect_get_data_access_cost (struct data_reference *dr,
1165 unsigned int *inside_cost,
1166 unsigned int *outside_cost)
1168 gimple stmt = DR_STMT (dr);
1169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1170 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1171 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1172 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173 int ncopies = vf / nunits;
1174 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1176 if (!supportable_dr_alignment)
1177 *inside_cost = VECT_MAX_COST;
1180 if (DR_IS_READ (dr))
1181 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1183 vect_get_store_cost (dr, ncopies, inside_cost);
1186 if (vect_print_dump_info (REPORT_COST))
1187 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1188 "outside_cost = %d.", *inside_cost, *outside_cost);
1193 vect_peeling_hash (const void *elem)
1195 const struct _vect_peel_info *peel_info;
1197 peel_info = (const struct _vect_peel_info *) elem;
1198 return (hashval_t) peel_info->npeel;
1203 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1205 const struct _vect_peel_info *a, *b;
1207 a = (const struct _vect_peel_info *) elem1;
1208 b = (const struct _vect_peel_info *) elem2;
1209 return (a->npeel == b->npeel);
1213 /* Insert DR into peeling hash table with NPEEL as key. */
1216 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1219 struct _vect_peel_info elem, *slot;
1221 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1224 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1230 slot = XNEW (struct _vect_peel_info);
1231 slot->npeel = npeel;
1234 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1239 if (!supportable_dr_alignment && !flag_vect_cost_model)
1240 slot->count += VECT_MAX_COST;
1244 /* Traverse peeling hash table to find peeling option that aligns maximum
1245 number of data accesses. */
1248 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1250 vect_peel_info elem = (vect_peel_info) *slot;
1251 vect_peel_extended_info max = (vect_peel_extended_info) data;
1253 if (elem->count > max->peel_info.count)
1255 max->peel_info.npeel = elem->npeel;
1256 max->peel_info.count = elem->count;
1257 max->peel_info.dr = elem->dr;
1264 /* Traverse peeling hash table and calculate cost for each peeling option.
1265 Find the one with the lowest cost. */
1268 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1270 vect_peel_info elem = (vect_peel_info) *slot;
1271 vect_peel_extended_info min = (vect_peel_extended_info) data;
1272 int save_misalignment, dummy;
1273 unsigned int inside_cost = 0, outside_cost = 0, i;
1274 gimple stmt = DR_STMT (elem->dr);
1275 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1276 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1277 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1278 struct data_reference *dr;
1280 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1282 stmt = DR_STMT (dr);
1283 stmt_info = vinfo_for_stmt (stmt);
1284 /* For interleaving, only the alignment of the first access
1286 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1287 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1290 save_misalignment = DR_MISALIGNMENT (dr);
1291 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1292 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1293 SET_DR_MISALIGNMENT (dr, save_misalignment);
1296 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1297 vect_get_single_scalar_iteration_cost (loop_vinfo));
1299 if (inside_cost < min->inside_cost
1300 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1302 min->inside_cost = inside_cost;
1303 min->outside_cost = outside_cost;
1304 min->peel_info.dr = elem->dr;
1305 min->peel_info.npeel = elem->npeel;
1312 /* Choose best peeling option by traversing peeling hash table and either
1313 choosing an option with the lowest cost (if cost model is enabled) or the
1314 option that aligns as many accesses as possible. */
1316 static struct data_reference *
1317 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1318 unsigned int *npeel)
1320 struct _vect_peel_extended_info res;
1322 res.peel_info.dr = NULL;
1324 if (flag_vect_cost_model)
1326 res.inside_cost = INT_MAX;
1327 res.outside_cost = INT_MAX;
1328 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1329 vect_peeling_hash_get_lowest_cost, &res);
1333 res.peel_info.count = 0;
1334 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1335 vect_peeling_hash_get_most_frequent, &res);
1338 *npeel = res.peel_info.npeel;
1339 return res.peel_info.dr;
1343 /* Function vect_enhance_data_refs_alignment
1345 This pass will use loop versioning and loop peeling in order to enhance
1346 the alignment of data references in the loop.
1348 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1349 original loop is to be vectorized. Any other loops that are created by
1350 the transformations performed in this pass - are not supposed to be
1351 vectorized. This restriction will be relaxed.
1353 This pass will require a cost model to guide it whether to apply peeling
1354 or versioning or a combination of the two. For example, the scheme that
1355 intel uses when given a loop with several memory accesses, is as follows:
1356 choose one memory access ('p') which alignment you want to force by doing
1357 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1358 other accesses are not necessarily aligned, or (2) use loop versioning to
1359 generate one loop in which all accesses are aligned, and another loop in
1360 which only 'p' is necessarily aligned.
1362 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1363 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1364 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1366 Devising a cost model is the most critical aspect of this work. It will
1367 guide us on which access to peel for, whether to use loop versioning, how
1368 many versions to create, etc. The cost model will probably consist of
1369 generic considerations as well as target specific considerations (on
1370 powerpc for example, misaligned stores are more painful than misaligned
1373 Here are the general steps involved in alignment enhancements:
1375 -- original loop, before alignment analysis:
1376 for (i=0; i<N; i++){
1377 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1378 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1381 -- After vect_compute_data_refs_alignment:
1382 for (i=0; i<N; i++){
1383 x = q[i]; # DR_MISALIGNMENT(q) = 3
1384 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1387 -- Possibility 1: we do loop versioning:
1389 for (i=0; i<N; i++){ # loop 1A
1390 x = q[i]; # DR_MISALIGNMENT(q) = 3
1391 p[i] = y; # DR_MISALIGNMENT(p) = 0
1395 for (i=0; i<N; i++){ # loop 1B
1396 x = q[i]; # DR_MISALIGNMENT(q) = 3
1397 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1401 -- Possibility 2: we do loop peeling:
1402 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1406 for (i = 3; i < N; i++){ # loop 2A
1407 x = q[i]; # DR_MISALIGNMENT(q) = 0
1408 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1411 -- Possibility 3: combination of loop peeling and versioning:
1412 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1417 for (i = 3; i<N; i++){ # loop 3A
1418 x = q[i]; # DR_MISALIGNMENT(q) = 0
1419 p[i] = y; # DR_MISALIGNMENT(p) = 0
1423 for (i = 3; i<N; i++){ # loop 3B
1424 x = q[i]; # DR_MISALIGNMENT(q) = 0
1425 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1429 These loops are later passed to loop_transform to be vectorized. The
1430 vectorizer will use the alignment information to guide the transformation
1431 (whether to generate regular loads/stores, or with special handling for
1435 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1437 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1438 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1439 enum dr_alignment_support supportable_dr_alignment;
1440 struct data_reference *dr0 = NULL, *first_store = NULL;
1441 struct data_reference *dr;
1443 bool do_peeling = false;
1444 bool do_versioning = false;
1447 stmt_vec_info stmt_info;
1448 int vect_versioning_for_alias_required;
1449 unsigned int npeel = 0;
1450 bool all_misalignments_unknown = true;
1451 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1452 unsigned possible_npeel_number = 1;
1454 unsigned int nelements, mis, same_align_drs_max = 0;
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1459 /* While cost model enhancements are expected in the future, the high level
1460 view of the code at this time is as follows:
1462 A) If there is a misaligned access then see if peeling to align
1463 this access can make all data references satisfy
1464 vect_supportable_dr_alignment. If so, update data structures
1465 as needed and return true.
1467 B) If peeling wasn't possible and there is a data reference with an
1468 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1469 then see if loop versioning checks can be used to make all data
1470 references satisfy vect_supportable_dr_alignment. If so, update
1471 data structures as needed and return true.
1473 C) If neither peeling nor versioning were successful then return false if
1474 any data reference does not satisfy vect_supportable_dr_alignment.
1476 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1478 Note, Possibility 3 above (which is peeling and versioning together) is not
1479 being done at this time. */
1481 /* (1) Peeling to force alignment. */
1483 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1485 + How many accesses will become aligned due to the peeling
1486 - How many accesses will become unaligned due to the peeling,
1487 and the cost of misaligned accesses.
1488 - The cost of peeling (the extra runtime checks, the increase
1491 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1493 stmt = DR_STMT (dr);
1494 stmt_info = vinfo_for_stmt (stmt);
1496 if (!STMT_VINFO_RELEVANT (stmt_info))
1499 /* For interleaving, only the alignment of the first access
1501 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1502 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1505 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1506 do_peeling = vector_alignment_reachable_p (dr);
1509 if (known_alignment_for_access_p (dr))
1511 unsigned int npeel_tmp;
1512 bool negative = tree_int_cst_compare (DR_STEP (dr),
1513 size_zero_node) < 0;
1515 /* Save info about DR in the hash table. */
1516 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1517 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1518 htab_create (1, vect_peeling_hash,
1519 vect_peeling_hash_eq, free);
1521 vectype = STMT_VINFO_VECTYPE (stmt_info);
1522 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1523 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1524 TREE_TYPE (DR_REF (dr))));
1525 npeel_tmp = (negative
1526 ? (mis - nelements) : (nelements - mis))
1529 /* For multiple types, it is possible that the bigger type access
1530 will have more than one peeling option. E.g., a loop with two
1531 types: one of size (vector size / 4), and the other one of
1532 size (vector size / 8). Vectorization factor will 8. If both
1533 access are misaligned by 3, the first one needs one scalar
1534 iteration to be aligned, and the second one needs 5. But the
1535 the first one will be aligned also by peeling 5 scalar
1536 iterations, and in that case both accesses will be aligned.
1537 Hence, except for the immediate peeling amount, we also want
1538 to try to add full vector size, while we don't exceed
1539 vectorization factor.
1540 We do this automtically for cost model, since we calculate cost
1541 for every peeling option. */
1542 if (!flag_vect_cost_model)
1543 possible_npeel_number = vf /nelements;
1545 /* Handle the aligned case. We may decide to align some other
1546 access, making DR unaligned. */
1547 if (DR_MISALIGNMENT (dr) == 0)
1550 if (!flag_vect_cost_model)
1551 possible_npeel_number++;
1554 for (j = 0; j < possible_npeel_number; j++)
1556 gcc_assert (npeel_tmp <= vf);
1557 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1558 npeel_tmp += nelements;
1561 all_misalignments_unknown = false;
1562 /* Data-ref that was chosen for the case that all the
1563 misalignments are unknown is not relevant anymore, since we
1564 have a data-ref with known alignment. */
1569 /* If we don't know all the misalignment values, we prefer
1570 peeling for data-ref that has maximum number of data-refs
1571 with the same alignment, unless the target prefers to align
1572 stores over load. */
1573 if (all_misalignments_unknown)
1575 if (same_align_drs_max < VEC_length (dr_p,
1576 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1579 same_align_drs_max = VEC_length (dr_p,
1580 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1584 if (!first_store && DR_IS_WRITE (dr))
1588 /* If there are both known and unknown misaligned accesses in the
1589 loop, we choose peeling amount according to the known
1593 if (!supportable_dr_alignment)
1596 if (!first_store && DR_IS_WRITE (dr))
1603 if (!aligned_access_p (dr))
1605 if (vect_print_dump_info (REPORT_DETAILS))
1606 fprintf (vect_dump, "vector alignment may not be reachable");
1613 vect_versioning_for_alias_required
1614 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1616 /* Temporarily, if versioning for alias is required, we disable peeling
1617 until we support peeling and versioning. Often peeling for alignment
1618 will require peeling for loop-bound, which in turn requires that we
1619 know how to adjust the loop ivs after the loop. */
1620 if (vect_versioning_for_alias_required
1621 || !vect_can_advance_ivs_p (loop_vinfo)
1622 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1625 if (do_peeling && all_misalignments_unknown
1626 && vect_supportable_dr_alignment (dr0, false))
1629 /* Check if the target requires to prefer stores over loads, i.e., if
1630 misaligned stores are more expensive than misaligned loads (taking
1631 drs with same alignment into account). */
1632 if (first_store && DR_IS_READ (dr0))
1634 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1635 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1636 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1637 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1639 vect_get_data_access_cost (dr0, &load_inside_cost,
1640 &load_outside_cost);
1641 vect_get_data_access_cost (first_store, &store_inside_cost,
1642 &store_outside_cost);
1644 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1645 aligning the load DR0). */
1646 load_inside_penalty = store_inside_cost;
1647 load_outside_penalty = store_outside_cost;
1648 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1649 (vinfo_for_stmt (DR_STMT (first_store))),
1652 if (DR_IS_READ (dr))
1654 load_inside_penalty += load_inside_cost;
1655 load_outside_penalty += load_outside_cost;
1659 load_inside_penalty += store_inside_cost;
1660 load_outside_penalty += store_outside_cost;
1663 /* Calculate the penalty for leaving DR0 unaligned (by
1664 aligning the FIRST_STORE). */
1665 store_inside_penalty = load_inside_cost;
1666 store_outside_penalty = load_outside_cost;
1667 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1668 (vinfo_for_stmt (DR_STMT (dr0))),
1671 if (DR_IS_READ (dr))
1673 store_inside_penalty += load_inside_cost;
1674 store_outside_penalty += load_outside_cost;
1678 store_inside_penalty += store_inside_cost;
1679 store_outside_penalty += store_outside_cost;
1682 if (load_inside_penalty > store_inside_penalty
1683 || (load_inside_penalty == store_inside_penalty
1684 && load_outside_penalty > store_outside_penalty))
1688 /* In case there are only loads with different unknown misalignments, use
1689 peeling only if it may help to align other accesses in the loop. */
1690 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1691 (vinfo_for_stmt (DR_STMT (dr0))))
1692 && vect_supportable_dr_alignment (dr0, false)
1693 != dr_unaligned_supported)
1697 if (do_peeling && !dr0)
1699 /* Peeling is possible, but there is no data access that is not supported
1700 unless aligned. So we try to choose the best possible peeling. */
1702 /* We should get here only if there are drs with known misalignment. */
1703 gcc_assert (!all_misalignments_unknown);
1705 /* Choose the best peeling from the hash table. */
1706 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1713 stmt = DR_STMT (dr0);
1714 stmt_info = vinfo_for_stmt (stmt);
1715 vectype = STMT_VINFO_VECTYPE (stmt_info);
1716 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1718 if (known_alignment_for_access_p (dr0))
1720 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1721 size_zero_node) < 0;
1724 /* Since it's known at compile time, compute the number of
1725 iterations in the peeled loop (the peeling factor) for use in
1726 updating DR_MISALIGNMENT values. The peeling factor is the
1727 vectorization factor minus the misalignment as an element
1729 mis = DR_MISALIGNMENT (dr0);
1730 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1731 npeel = ((negative ? mis - nelements : nelements - mis)
1735 /* For interleaved data access every iteration accesses all the
1736 members of the group, therefore we divide the number of iterations
1737 by the group size. */
1738 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1739 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1740 npeel /= DR_GROUP_SIZE (stmt_info);
1742 if (vect_print_dump_info (REPORT_DETAILS))
1743 fprintf (vect_dump, "Try peeling by %d", npeel);
1746 /* Ensure that all data refs can be vectorized after the peel. */
1747 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1749 int save_misalignment;
1754 stmt = DR_STMT (dr);
1755 stmt_info = vinfo_for_stmt (stmt);
1756 /* For interleaving, only the alignment of the first access
1758 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1759 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1762 save_misalignment = DR_MISALIGNMENT (dr);
1763 vect_update_misalignment_for_peel (dr, dr0, npeel);
1764 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1765 SET_DR_MISALIGNMENT (dr, save_misalignment);
1767 if (!supportable_dr_alignment)
1774 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1776 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1785 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1786 If the misalignment of DR_i is identical to that of dr0 then set
1787 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1788 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1789 by the peeling factor times the element size of DR_i (MOD the
1790 vectorization factor times the size). Otherwise, the
1791 misalignment of DR_i must be set to unknown. */
1792 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1794 vect_update_misalignment_for_peel (dr, dr0, npeel);
1796 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1798 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1800 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1801 SET_DR_MISALIGNMENT (dr0, 0);
1802 if (vect_print_dump_info (REPORT_ALIGNMENT))
1803 fprintf (vect_dump, "Alignment of access forced using peeling.");
1805 if (vect_print_dump_info (REPORT_DETAILS))
1806 fprintf (vect_dump, "Peeling for alignment will be applied.");
1808 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1815 /* (2) Versioning to force alignment. */
1817 /* Try versioning if:
1818 1) flag_tree_vect_loop_version is TRUE
1819 2) optimize loop for speed
1820 3) there is at least one unsupported misaligned data ref with an unknown
1822 4) all misaligned data refs with a known misalignment are supported, and
1823 5) the number of runtime alignment checks is within reason. */
1826 flag_tree_vect_loop_version
1827 && optimize_loop_nest_for_speed_p (loop)
1828 && (!loop->inner); /* FORNOW */
1832 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1834 stmt = DR_STMT (dr);
1835 stmt_info = vinfo_for_stmt (stmt);
1837 /* For interleaving, only the alignment of the first access
1839 if (aligned_access_p (dr)
1840 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1841 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1844 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1846 if (!supportable_dr_alignment)
1852 if (known_alignment_for_access_p (dr)
1853 || VEC_length (gimple,
1854 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1855 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1857 do_versioning = false;
1861 stmt = DR_STMT (dr);
1862 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1863 gcc_assert (vectype);
1865 /* The rightmost bits of an aligned address must be zeros.
1866 Construct the mask needed for this test. For example,
1867 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1868 mask must be 15 = 0xf. */
1869 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1871 /* FORNOW: use the same mask to test all potentially unaligned
1872 references in the loop. The vectorizer currently supports
1873 a single vector size, see the reference to
1874 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1875 vectorization factor is computed. */
1876 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1877 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1878 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1879 VEC_safe_push (gimple, heap,
1880 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1885 /* Versioning requires at least one misaligned data reference. */
1886 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1887 do_versioning = false;
1888 else if (!do_versioning)
1889 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1894 VEC(gimple,heap) *may_misalign_stmts
1895 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1898 /* It can now be assumed that the data references in the statements
1899 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1900 of the loop being vectorized. */
1901 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1903 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1904 dr = STMT_VINFO_DATA_REF (stmt_info);
1905 SET_DR_MISALIGNMENT (dr, 0);
1906 if (vect_print_dump_info (REPORT_ALIGNMENT))
1907 fprintf (vect_dump, "Alignment of access forced using versioning.");
1910 if (vect_print_dump_info (REPORT_DETAILS))
1911 fprintf (vect_dump, "Versioning for alignment will be applied.");
1913 /* Peeling and versioning can't be done together at this time. */
1914 gcc_assert (! (do_peeling && do_versioning));
1916 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1921 /* This point is reached if neither peeling nor versioning is being done. */
1922 gcc_assert (! (do_peeling || do_versioning));
1924 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1929 /* Function vect_find_same_alignment_drs.
1931 Update group and alignment relations according to the chosen
1932 vectorization factor. */
1935 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1936 loop_vec_info loop_vinfo)
1939 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1940 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1941 struct data_reference *dra = DDR_A (ddr);
1942 struct data_reference *drb = DDR_B (ddr);
1943 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1944 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1945 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1946 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1947 lambda_vector dist_v;
1948 unsigned int loop_depth;
1950 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1956 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1959 /* Loop-based vectorization and known data dependence. */
1960 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1963 /* Data-dependence analysis reports a distance vector of zero
1964 for data-references that overlap only in the first iteration
1965 but have different sign step (see PR45764).
1966 So as a sanity check require equal DR_STEP. */
1967 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1970 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1971 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1973 int dist = dist_v[loop_depth];
1975 if (vect_print_dump_info (REPORT_DR_DETAILS))
1976 fprintf (vect_dump, "dependence distance = %d.", dist);
1978 /* Same loop iteration. */
1980 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1982 /* Two references with distance zero have the same alignment. */
1983 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1984 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1985 if (vect_print_dump_info (REPORT_ALIGNMENT))
1986 fprintf (vect_dump, "accesses have the same alignment.");
1987 if (vect_print_dump_info (REPORT_DR_DETAILS))
1989 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1990 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1991 fprintf (vect_dump, " and ");
1992 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1999 /* Function vect_analyze_data_refs_alignment
2001 Analyze the alignment of the data-references in the loop.
2002 Return FALSE if a data reference is found that cannot be vectorized. */
2005 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2006 bb_vec_info bb_vinfo)
2008 if (vect_print_dump_info (REPORT_DETAILS))
2009 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2011 /* Mark groups of data references with same alignment using
2012 data dependence information. */
2015 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2016 struct data_dependence_relation *ddr;
2019 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2020 vect_find_same_alignment_drs (ddr, loop_vinfo);
2023 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2025 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2027 "not vectorized: can't calculate alignment for data ref.");
2035 /* Analyze groups of strided accesses: check that DR belongs to a group of
2036 strided accesses of legal size, step, etc. Detect gaps, single element
2037 interleaving, and other special cases. Set strided access info.
2038 Collect groups of strided stores for further use in SLP analysis. */
2041 vect_analyze_group_access (struct data_reference *dr)
2043 tree step = DR_STEP (dr);
2044 tree scalar_type = TREE_TYPE (DR_REF (dr));
2045 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2046 gimple stmt = DR_STMT (dr);
2047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2048 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2049 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2050 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2051 HOST_WIDE_INT stride, last_accessed_element = 1;
2052 bool slp_impossible = false;
2053 struct loop *loop = NULL;
2056 loop = LOOP_VINFO_LOOP (loop_vinfo);
2058 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2059 interleaving group (including gaps). */
2060 stride = dr_step / type_size;
2062 /* Not consecutive access is possible only if it is a part of interleaving. */
2063 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2065 /* Check if it this DR is a part of interleaving, and is a single
2066 element of the group that is accessed in the loop. */
2068 /* Gaps are supported only for loads. STEP must be a multiple of the type
2069 size. The size of the group must be a power of 2. */
2071 && (dr_step % type_size) == 0
2073 && exact_log2 (stride) != -1)
2075 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2076 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2077 if (vect_print_dump_info (REPORT_DR_DETAILS))
2079 fprintf (vect_dump, "Detected single element interleaving ");
2080 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2081 fprintf (vect_dump, " step ");
2082 print_generic_expr (vect_dump, step, TDF_SLIM);
2087 if (vect_print_dump_info (REPORT_DETAILS))
2088 fprintf (vect_dump, "Data access with gaps requires scalar "
2092 if (vect_print_dump_info (REPORT_DETAILS))
2093 fprintf (vect_dump, "Peeling for outer loop is not"
2098 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2104 if (vect_print_dump_info (REPORT_DETAILS))
2106 fprintf (vect_dump, "not consecutive access ");
2107 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2112 /* Mark the statement as unvectorizable. */
2113 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2120 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2122 /* First stmt in the interleaving chain. Check the chain. */
2123 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2124 struct data_reference *data_ref = dr;
2125 unsigned int count = 1;
2127 tree prev_init = DR_INIT (data_ref);
2129 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2133 /* Skip same data-refs. In case that two or more stmts share
2134 data-ref (supported only for loads), we vectorize only the first
2135 stmt, and the rest get their vectorized loads from the first
2137 if (!tree_int_cst_compare (DR_INIT (data_ref),
2138 DR_INIT (STMT_VINFO_DATA_REF (
2139 vinfo_for_stmt (next)))))
2141 if (DR_IS_WRITE (data_ref))
2143 if (vect_print_dump_info (REPORT_DETAILS))
2144 fprintf (vect_dump, "Two store stmts share the same dr.");
2148 /* Check that there is no load-store dependencies for this loads
2149 to prevent a case of load-store-load to the same location. */
2150 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2151 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2153 if (vect_print_dump_info (REPORT_DETAILS))
2155 "READ_WRITE dependence in interleaving.");
2159 /* For load use the same data-ref load. */
2160 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2163 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2169 /* Check that all the accesses have the same STEP. */
2170 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2171 if (tree_int_cst_compare (step, next_step))
2173 if (vect_print_dump_info (REPORT_DETAILS))
2174 fprintf (vect_dump, "not consecutive access in interleaving");
2178 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2179 /* Check that the distance between two accesses is equal to the type
2180 size. Otherwise, we have gaps. */
2181 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2182 - TREE_INT_CST_LOW (prev_init)) / type_size;
2185 /* FORNOW: SLP of accesses with gaps is not supported. */
2186 slp_impossible = true;
2187 if (DR_IS_WRITE (data_ref))
2189 if (vect_print_dump_info (REPORT_DETAILS))
2190 fprintf (vect_dump, "interleaved store with gaps");
2197 last_accessed_element += diff;
2199 /* Store the gap from the previous member of the group. If there is no
2200 gap in the access, DR_GROUP_GAP is always 1. */
2201 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2203 prev_init = DR_INIT (data_ref);
2204 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2205 /* Count the number of data-refs in the chain. */
2209 /* COUNT is the number of accesses found, we multiply it by the size of
2210 the type to get COUNT_IN_BYTES. */
2211 count_in_bytes = type_size * count;
2213 /* Check that the size of the interleaving (including gaps) is not
2214 greater than STEP. */
2215 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2217 if (vect_print_dump_info (REPORT_DETAILS))
2219 fprintf (vect_dump, "interleaving size is greater than step for ");
2220 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2225 /* Check that the size of the interleaving is equal to STEP for stores,
2226 i.e., that there are no gaps. */
2227 if (dr_step && dr_step != count_in_bytes)
2229 if (DR_IS_READ (dr))
2231 slp_impossible = true;
2232 /* There is a gap after the last load in the group. This gap is a
2233 difference between the stride and the number of elements. When
2234 there is no gap, this difference should be 0. */
2235 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2239 if (vect_print_dump_info (REPORT_DETAILS))
2240 fprintf (vect_dump, "interleaved store with gaps");
2245 /* Check that STEP is a multiple of type size. */
2246 if (dr_step && (dr_step % type_size) != 0)
2248 if (vect_print_dump_info (REPORT_DETAILS))
2250 fprintf (vect_dump, "step is not a multiple of type size: step ");
2251 print_generic_expr (vect_dump, step, TDF_SLIM);
2252 fprintf (vect_dump, " size ");
2253 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2259 /* FORNOW: we handle only interleaving that is a power of 2.
2260 We don't fail here if it may be still possible to vectorize the
2261 group using SLP. If not, the size of the group will be checked in
2262 vect_analyze_operations, and the vectorization will fail. */
2263 if (exact_log2 (stride) == -1)
2265 if (vect_print_dump_info (REPORT_DETAILS))
2266 fprintf (vect_dump, "interleaving is not a power of 2");
2275 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2276 if (vect_print_dump_info (REPORT_DETAILS))
2277 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2279 /* SLP: create an SLP data structure for every interleaving group of
2280 stores for further analysis in vect_analyse_slp. */
2281 if (DR_IS_WRITE (dr) && !slp_impossible)
2284 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2287 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2291 /* There is a gap in the end of the group. */
2292 if (stride - last_accessed_element > 0 && loop_vinfo)
2294 if (vect_print_dump_info (REPORT_DETAILS))
2295 fprintf (vect_dump, "Data access with gaps requires scalar "
2299 if (vect_print_dump_info (REPORT_DETAILS))
2300 fprintf (vect_dump, "Peeling for outer loop is not supported");
2304 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2312 /* Analyze the access pattern of the data-reference DR.
2313 In case of non-consecutive accesses call vect_analyze_group_access() to
2314 analyze groups of strided accesses. */
2317 vect_analyze_data_ref_access (struct data_reference *dr)
2319 tree step = DR_STEP (dr);
2320 tree scalar_type = TREE_TYPE (DR_REF (dr));
2321 gimple stmt = DR_STMT (dr);
2322 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2323 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2324 struct loop *loop = NULL;
2325 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2328 loop = LOOP_VINFO_LOOP (loop_vinfo);
2330 if (loop_vinfo && !step)
2332 if (vect_print_dump_info (REPORT_DETAILS))
2333 fprintf (vect_dump, "bad data-ref access in loop");
2337 /* Don't allow invariant accesses in loops. */
2338 if (loop_vinfo && dr_step == 0)
2341 if (loop && nested_in_vect_loop_p (loop, stmt))
2343 /* Interleaved accesses are not yet supported within outer-loop
2344 vectorization for references in the inner-loop. */
2345 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2347 /* For the rest of the analysis we use the outer-loop step. */
2348 step = STMT_VINFO_DR_STEP (stmt_info);
2349 dr_step = TREE_INT_CST_LOW (step);
2353 if (vect_print_dump_info (REPORT_ALIGNMENT))
2354 fprintf (vect_dump, "zero step in outer loop.");
2355 if (DR_IS_READ (dr))
2363 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2365 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2367 /* Mark that it is not interleaving. */
2368 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2372 if (loop && nested_in_vect_loop_p (loop, stmt))
2374 if (vect_print_dump_info (REPORT_ALIGNMENT))
2375 fprintf (vect_dump, "strided access in outer loop.");
2379 /* Not consecutive access - check if it's a part of interleaving group. */
2380 return vect_analyze_group_access (dr);
2384 /* Function vect_analyze_data_ref_accesses.
2386 Analyze the access pattern of all the data references in the loop.
2388 FORNOW: the only access pattern that is considered vectorizable is a
2389 simple step 1 (consecutive) access.
2391 FORNOW: handle only arrays and pointer accesses. */
2394 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2397 VEC (data_reference_p, heap) *datarefs;
2398 struct data_reference *dr;
2400 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2404 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2406 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2408 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2409 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2410 && !vect_analyze_data_ref_access (dr))
2412 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2413 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2417 /* Mark the statement as not vectorizable. */
2418 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2428 /* Function vect_prune_runtime_alias_test_list.
2430 Prune a list of ddrs to be tested at run-time by versioning for alias.
2431 Return FALSE if resulting list of ddrs is longer then allowed by
2432 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2435 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2437 VEC (ddr_p, heap) * ddrs =
2438 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2441 if (vect_print_dump_info (REPORT_DETAILS))
2442 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2444 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2449 ddr_i = VEC_index (ddr_p, ddrs, i);
2452 for (j = 0; j < i; j++)
2454 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2456 if (vect_vfa_range_equal (ddr_i, ddr_j))
2458 if (vect_print_dump_info (REPORT_DR_DETAILS))
2460 fprintf (vect_dump, "found equal ranges ");
2461 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2462 fprintf (vect_dump, ", ");
2463 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2464 fprintf (vect_dump, " and ");
2465 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2466 fprintf (vect_dump, ", ");
2467 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2476 VEC_ordered_remove (ddr_p, ddrs, i);
2482 if (VEC_length (ddr_p, ddrs) >
2483 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2485 if (vect_print_dump_info (REPORT_DR_DETAILS))
2488 "disable versioning for alias - max number of generated "
2489 "checks exceeded.");
2492 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2501 /* Function vect_analyze_data_refs.
2503 Find all the data references in the loop or basic block.
2505 The general structure of the analysis of data refs in the vectorizer is as
2507 1- vect_analyze_data_refs(loop/bb): call
2508 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2509 in the loop/bb and their dependences.
2510 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2511 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2512 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2517 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2518 bb_vec_info bb_vinfo,
2521 struct loop *loop = NULL;
2522 basic_block bb = NULL;
2524 VEC (data_reference_p, heap) *datarefs;
2525 struct data_reference *dr;
2529 if (vect_print_dump_info (REPORT_DETAILS))
2530 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2534 loop = LOOP_VINFO_LOOP (loop_vinfo);
2535 res = compute_data_dependences_for_loop
2537 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2538 &LOOP_VINFO_DATAREFS (loop_vinfo),
2539 &LOOP_VINFO_DDRS (loop_vinfo));
2543 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2544 fprintf (vect_dump, "not vectorized: loop contains function calls"
2545 " or data references that cannot be analyzed");
2549 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2553 bb = BB_VINFO_BB (bb_vinfo);
2554 res = compute_data_dependences_for_bb (bb, true,
2555 &BB_VINFO_DATAREFS (bb_vinfo),
2556 &BB_VINFO_DDRS (bb_vinfo));
2559 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2560 fprintf (vect_dump, "not vectorized: basic block contains function"
2561 " calls or data references that cannot be analyzed");
2565 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2568 /* Go through the data-refs, check that the analysis succeeded. Update
2569 pointer from stmt_vec_info struct to DR and vectype. */
2571 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2574 stmt_vec_info stmt_info;
2575 tree base, offset, init;
2578 if (!dr || !DR_REF (dr))
2580 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2581 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2585 stmt = DR_STMT (dr);
2586 stmt_info = vinfo_for_stmt (stmt);
2588 /* Check that analysis of the data-ref succeeded. */
2589 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2592 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2594 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2595 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2601 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2603 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2604 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2609 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2611 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2613 fprintf (vect_dump, "not vectorized: volatile type ");
2614 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2619 base = unshare_expr (DR_BASE_ADDRESS (dr));
2620 offset = unshare_expr (DR_OFFSET (dr));
2621 init = unshare_expr (DR_INIT (dr));
2623 if (stmt_can_throw_internal (stmt))
2625 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2627 fprintf (vect_dump, "not vectorized: statement can throw an "
2629 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2634 if (is_gimple_call (stmt))
2636 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2638 fprintf (vect_dump, "not vectorized: dr in a call ");
2639 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2644 /* Update DR field in stmt_vec_info struct. */
2646 /* If the dataref is in an inner-loop of the loop that is considered for
2647 for vectorization, we also want to analyze the access relative to
2648 the outer-loop (DR contains information only relative to the
2649 inner-most enclosing loop). We do that by building a reference to the
2650 first location accessed by the inner-loop, and analyze it relative to
2652 if (loop && nested_in_vect_loop_p (loop, stmt))
2654 tree outer_step, outer_base, outer_init;
2655 HOST_WIDE_INT pbitsize, pbitpos;
2657 enum machine_mode pmode;
2658 int punsignedp, pvolatilep;
2659 affine_iv base_iv, offset_iv;
2662 /* Build a reference to the first location accessed by the
2663 inner-loop: *(BASE+INIT). (The first location is actually
2664 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2665 tree inner_base = build_fold_indirect_ref
2666 (fold_build2 (POINTER_PLUS_EXPR,
2667 TREE_TYPE (base), base,
2668 fold_convert (sizetype, init)));
2670 if (vect_print_dump_info (REPORT_DETAILS))
2672 fprintf (vect_dump, "analyze in outer-loop: ");
2673 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2676 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2677 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2678 gcc_assert (outer_base != NULL_TREE);
2680 if (pbitpos % BITS_PER_UNIT != 0)
2682 if (vect_print_dump_info (REPORT_DETAILS))
2683 fprintf (vect_dump, "failed: bit offset alignment.\n");
2687 outer_base = build_fold_addr_expr (outer_base);
2688 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2691 if (vect_print_dump_info (REPORT_DETAILS))
2692 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2699 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2707 offset_iv.base = ssize_int (0);
2708 offset_iv.step = ssize_int (0);
2710 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2713 if (vect_print_dump_info (REPORT_DETAILS))
2714 fprintf (vect_dump, "evolution of offset is not affine.\n");
2718 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2719 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2720 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2721 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2722 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2724 outer_step = size_binop (PLUS_EXPR,
2725 fold_convert (ssizetype, base_iv.step),
2726 fold_convert (ssizetype, offset_iv.step));
2728 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2729 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2730 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2731 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2732 STMT_VINFO_DR_OFFSET (stmt_info) =
2733 fold_convert (ssizetype, offset_iv.base);
2734 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2735 size_int (highest_pow2_factor (offset_iv.base));
2737 if (vect_print_dump_info (REPORT_DETAILS))
2739 fprintf (vect_dump, "\touter base_address: ");
2740 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2741 fprintf (vect_dump, "\n\touter offset from base address: ");
2742 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2743 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2744 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2745 fprintf (vect_dump, "\n\touter step: ");
2746 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2747 fprintf (vect_dump, "\n\touter aligned to: ");
2748 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2752 if (STMT_VINFO_DATA_REF (stmt_info))
2754 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2757 "not vectorized: more than one data ref in stmt: ");
2758 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2763 STMT_VINFO_DATA_REF (stmt_info) = dr;
2765 /* Set vectype for STMT. */
2766 scalar_type = TREE_TYPE (DR_REF (dr));
2767 STMT_VINFO_VECTYPE (stmt_info) =
2768 get_vectype_for_scalar_type (scalar_type);
2769 if (!STMT_VINFO_VECTYPE (stmt_info))
2771 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2774 "not vectorized: no vectype for stmt: ");
2775 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2776 fprintf (vect_dump, " scalar_type: ");
2777 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2782 /* Mark the statement as not vectorizable. */
2783 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2790 /* Adjust the minimal vectorization factor according to the
2792 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2801 /* Function vect_get_new_vect_var.
2803 Returns a name for a new variable. The current naming scheme appends the
2804 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2805 the name of vectorizer generated variables, and appends that to NAME if
2809 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2816 case vect_simple_var:
2819 case vect_scalar_var:
2822 case vect_pointer_var:
2831 char* tmp = concat (prefix, name, NULL);
2832 new_vect_var = create_tmp_var (type, tmp);
2836 new_vect_var = create_tmp_var (type, prefix);
2838 /* Mark vector typed variable as a gimple register variable. */
2839 if (TREE_CODE (type) == VECTOR_TYPE)
2840 DECL_GIMPLE_REG_P (new_vect_var) = true;
2842 return new_vect_var;
2846 /* Function vect_create_addr_base_for_vector_ref.
2848 Create an expression that computes the address of the first memory location
2849 that will be accessed for a data reference.
2852 STMT: The statement containing the data reference.
2853 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2854 OFFSET: Optional. If supplied, it is be added to the initial address.
2855 LOOP: Specify relative to which loop-nest should the address be computed.
2856 For example, when the dataref is in an inner-loop nested in an
2857 outer-loop that is now being vectorized, LOOP can be either the
2858 outer-loop, or the inner-loop. The first memory location accessed
2859 by the following dataref ('in' points to short):
2866 if LOOP=i_loop: &in (relative to i_loop)
2867 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2870 1. Return an SSA_NAME whose value is the address of the memory location of
2871 the first vector of the data reference.
2872 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2873 these statement(s) which define the returned SSA_NAME.
2875 FORNOW: We are only handling array accesses with step 1. */
2878 vect_create_addr_base_for_vector_ref (gimple stmt,
2879 gimple_seq *new_stmt_list,
2883 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2884 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2885 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2887 tree data_ref_base_var;
2889 tree addr_base, addr_expr;
2891 gimple_seq seq = NULL;
2892 tree base_offset = unshare_expr (DR_OFFSET (dr));
2893 tree init = unshare_expr (DR_INIT (dr));
2895 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2899 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2901 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2903 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2905 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2906 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2907 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2911 base_name = build_fold_indirect_ref (data_ref_base);
2914 base_offset = ssize_int (0);
2915 init = ssize_int (0);
2916 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2919 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2920 add_referenced_var (data_ref_base_var);
2921 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2923 gimple_seq_add_seq (new_stmt_list, seq);
2925 /* Create base_offset */
2926 base_offset = size_binop (PLUS_EXPR,
2927 fold_convert (sizetype, base_offset),
2928 fold_convert (sizetype, init));
2929 dest = create_tmp_var (sizetype, "base_off");
2930 add_referenced_var (dest);
2931 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2932 gimple_seq_add_seq (new_stmt_list, seq);
2936 tree tmp = create_tmp_var (sizetype, "offset");
2938 add_referenced_var (tmp);
2939 offset = fold_build2 (MULT_EXPR, sizetype,
2940 fold_convert (sizetype, offset), step);
2941 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2942 base_offset, offset);
2943 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2944 gimple_seq_add_seq (new_stmt_list, seq);
2947 /* base + base_offset */
2949 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2950 data_ref_base, base_offset);
2953 addr_base = build1 (ADDR_EXPR,
2954 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2955 unshare_expr (DR_REF (dr)));
2958 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2959 base = get_base_address (DR_REF (dr));
2961 && TREE_CODE (base) == MEM_REF)
2963 = build_qualified_type (vect_ptr_type,
2964 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2966 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2967 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2968 get_name (base_name));
2969 add_referenced_var (addr_expr);
2970 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2971 gimple_seq_add_seq (new_stmt_list, seq);
2973 if (DR_PTR_INFO (dr)
2974 && TREE_CODE (vec_stmt) == SSA_NAME)
2976 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2979 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2980 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2984 if (vect_print_dump_info (REPORT_DETAILS))
2986 fprintf (vect_dump, "created ");
2987 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2994 /* Function vect_create_data_ref_ptr.
2996 Create a new pointer to vector type (vp), that points to the first location
2997 accessed in the loop by STMT, along with the def-use update chain to
2998 appropriately advance the pointer through the loop iterations. Also set
2999 aliasing information for the pointer. This vector pointer is used by the
3000 callers to this function to create a memory reference expression for vector
3004 1. STMT: a stmt that references memory. Expected to be of the form
3005 GIMPLE_ASSIGN <name, data-ref> or
3006 GIMPLE_ASSIGN <data-ref, name>.
3007 2. AT_LOOP: the loop where the vector memref is to be created.
3008 3. OFFSET (optional): an offset to be added to the initial address accessed
3009 by the data-ref in STMT.
3010 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
3011 pointing to the initial address.
3012 5. TYPE: if not NULL indicates the required type of the data-ref.
3015 1. Declare a new ptr to vector_type, and have it point to the base of the
3016 data reference (initial addressed accessed by the data reference).
3017 For example, for vector of type V8HI, the following code is generated:
3020 vp = (v8hi *)initial_address;
3022 if OFFSET is not supplied:
3023 initial_address = &a[init];
3024 if OFFSET is supplied:
3025 initial_address = &a[init + OFFSET];
3027 Return the initial_address in INITIAL_ADDRESS.
3029 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3030 update the pointer in each iteration of the loop.
3032 Return the increment stmt that updates the pointer in PTR_INCR.
3034 3. Set INV_P to true if the access pattern of the data reference in the
3035 vectorized loop is invariant. Set it to false otherwise.
3037 4. Return the pointer. */
3040 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
3041 tree offset, tree *initial_address, gimple *ptr_incr,
3042 bool only_init, bool *inv_p)
3045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3046 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3047 struct loop *loop = NULL;
3048 bool nested_in_vect_loop = false;
3049 struct loop *containing_loop = NULL;
3050 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3055 gimple_seq new_stmt_list = NULL;
3059 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3061 gimple_stmt_iterator incr_gsi;
3064 tree indx_before_incr, indx_after_incr;
3067 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3068 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3073 loop = LOOP_VINFO_LOOP (loop_vinfo);
3074 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3075 containing_loop = (gimple_bb (stmt))->loop_father;
3076 pe = loop_preheader_edge (loop);
3080 gcc_assert (bb_vinfo);
3085 /* Check the step (evolution) of the load in LOOP, and record
3086 whether it's invariant. */
3087 if (nested_in_vect_loop)
3088 step = STMT_VINFO_DR_STEP (stmt_info);
3090 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3092 if (tree_int_cst_compare (step, size_zero_node) == 0)
3096 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3098 /* Create an expression for the first address accessed by this load
3100 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3102 if (vect_print_dump_info (REPORT_DETAILS))
3104 tree data_ref_base = base_name;
3105 fprintf (vect_dump, "create vector-pointer variable to type: ");
3106 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3107 if (TREE_CODE (data_ref_base) == VAR_DECL
3108 || TREE_CODE (data_ref_base) == ARRAY_REF)
3109 fprintf (vect_dump, " vectorizing an array ref: ");
3110 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3111 fprintf (vect_dump, " vectorizing a record based array ref: ");
3112 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3113 fprintf (vect_dump, " vectorizing a pointer ref: ");
3114 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3117 /* (1) Create the new vector-pointer variable. */
3118 vect_ptr_type = build_pointer_type (vectype);
3119 base = get_base_address (DR_REF (dr));
3121 && TREE_CODE (base) == MEM_REF)
3123 = build_qualified_type (vect_ptr_type,
3124 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3125 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3126 get_name (base_name));
3128 /* Vector types inherit the alias set of their component type by default so
3129 we need to use a ref-all pointer if the data reference does not conflict
3130 with the created vector data reference because it is not addressable. */
3131 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3132 get_alias_set (DR_REF (dr))))
3135 = build_pointer_type_for_mode (vectype,
3136 TYPE_MODE (vect_ptr_type), true);
3137 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3138 get_name (base_name));
3141 /* Likewise for any of the data references in the stmt group. */
3142 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3144 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3147 tree lhs = gimple_assign_lhs (orig_stmt);
3148 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3149 get_alias_set (lhs)))
3152 = build_pointer_type_for_mode (vectype,
3153 TYPE_MODE (vect_ptr_type), true);
3155 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3156 get_name (base_name));
3160 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3165 add_referenced_var (vect_ptr);
3167 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3168 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3169 def-use update cycles for the pointer: one relative to the outer-loop
3170 (LOOP), which is what steps (3) and (4) below do. The other is relative
3171 to the inner-loop (which is the inner-most loop containing the dataref),
3172 and this is done be step (5) below.
3174 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3175 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3176 redundant. Steps (3),(4) create the following:
3179 LOOP: vp1 = phi(vp0,vp2)
3185 If there is an inner-loop nested in loop, then step (5) will also be
3186 applied, and an additional update in the inner-loop will be created:
3189 LOOP: vp1 = phi(vp0,vp2)
3191 inner: vp3 = phi(vp1,vp4)
3192 vp4 = vp3 + inner_step
3198 /* (2) Calculate the initial address the vector-pointer, and set
3199 the vector-pointer to point to it before the loop. */
3201 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3203 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3209 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3210 gcc_assert (!new_bb);
3213 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3216 *initial_address = new_temp;
3218 /* Create: p = (vectype *) initial_base */
3219 if (TREE_CODE (new_temp) != SSA_NAME
3220 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3222 vec_stmt = gimple_build_assign (vect_ptr,
3223 fold_convert (vect_ptr_type, new_temp));
3224 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3225 /* Copy the points-to information if it exists. */
3226 if (DR_PTR_INFO (dr))
3227 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3228 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3231 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3232 gcc_assert (!new_bb);
3235 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3238 vect_ptr_init = new_temp;
3240 /* (3) Handle the updating of the vector-pointer inside the loop.
3241 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3242 inner-loop nested in LOOP (during outer-loop vectorization). */
3244 /* No update in loop is required. */
3245 if (only_init && (!loop_vinfo || at_loop == loop))
3246 vptr = vect_ptr_init;
3249 /* The step of the vector pointer is the Vector Size. */
3250 tree step = TYPE_SIZE_UNIT (vectype);
3251 /* One exception to the above is when the scalar step of the load in
3252 LOOP is zero. In this case the step here is also zero. */
3254 step = size_zero_node;
3256 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3258 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3260 create_iv (vect_ptr_init,
3261 fold_convert (vect_ptr_type, step),
3262 vect_ptr, loop, &incr_gsi, insert_after,
3263 &indx_before_incr, &indx_after_incr);
3264 incr = gsi_stmt (incr_gsi);
3265 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3267 /* Copy the points-to information if it exists. */
3268 if (DR_PTR_INFO (dr))
3270 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3271 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3276 vptr = indx_before_incr;
3279 if (!nested_in_vect_loop || only_init)
3283 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3284 nested in LOOP, if exists. */
3286 gcc_assert (nested_in_vect_loop);
3289 standard_iv_increment_position (containing_loop, &incr_gsi,
3291 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3292 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3294 incr = gsi_stmt (incr_gsi);
3295 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3297 /* Copy the points-to information if it exists. */
3298 if (DR_PTR_INFO (dr))
3300 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3301 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3306 return indx_before_incr;
3313 /* Function bump_vector_ptr
3315 Increment a pointer (to a vector type) by vector-size. If requested,
3316 i.e. if PTR-INCR is given, then also connect the new increment stmt
3317 to the existing def-use update-chain of the pointer, by modifying
3318 the PTR_INCR as illustrated below:
3320 The pointer def-use update-chain before this function:
3321 DATAREF_PTR = phi (p_0, p_2)
3323 PTR_INCR: p_2 = DATAREF_PTR + step
3325 The pointer def-use update-chain after this function:
3326 DATAREF_PTR = phi (p_0, p_2)
3328 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3330 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3333 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3335 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3336 the loop. The increment amount across iterations is expected
3338 BSI - location where the new update stmt is to be placed.
3339 STMT - the original scalar memory-access stmt that is being vectorized.
3340 BUMP - optional. The offset by which to bump the pointer. If not given,
3341 the offset is assumed to be vector_size.
3343 Output: Return NEW_DATAREF_PTR as illustrated above.
3348 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3349 gimple stmt, tree bump)
3351 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3352 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3353 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3354 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3355 tree update = TYPE_SIZE_UNIT (vectype);
3358 use_operand_p use_p;
3359 tree new_dataref_ptr;
3364 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3365 dataref_ptr, update);
3366 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3367 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3368 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3370 /* Copy the points-to information if it exists. */
3371 if (DR_PTR_INFO (dr))
3373 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3374 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3375 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3379 return new_dataref_ptr;
3381 /* Update the vector-pointer's cross-iteration increment. */
3382 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3384 tree use = USE_FROM_PTR (use_p);
3386 if (use == dataref_ptr)
3387 SET_USE (use_p, new_dataref_ptr);
3389 gcc_assert (tree_int_cst_compare (use, update) == 0);
3392 return new_dataref_ptr;
3396 /* Function vect_create_destination_var.
3398 Create a new temporary of type VECTYPE. */
3401 vect_create_destination_var (tree scalar_dest, tree vectype)
3404 const char *new_name;
3406 enum vect_var_kind kind;
3408 kind = vectype ? vect_simple_var : vect_scalar_var;
3409 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3411 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3413 new_name = get_name (scalar_dest);
3416 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3417 add_referenced_var (vec_dest);
3422 /* Function vect_strided_store_supported.
3424 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3425 and FALSE otherwise. */
3428 vect_strided_store_supported (tree vectype)
3430 optab interleave_high_optab, interleave_low_optab;
3431 enum machine_mode mode;
3433 mode = TYPE_MODE (vectype);
3435 /* Check that the operation is supported. */
3436 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3437 vectype, optab_default);
3438 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3439 vectype, optab_default);
3440 if (!interleave_high_optab || !interleave_low_optab)
3442 if (vect_print_dump_info (REPORT_DETAILS))
3443 fprintf (vect_dump, "no optab for interleave.");
3447 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3448 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3450 if (vect_print_dump_info (REPORT_DETAILS))
3451 fprintf (vect_dump, "interleave op not supported by target.");
3459 /* Function vect_permute_store_chain.
3461 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3462 a power of 2, generate interleave_high/low stmts to reorder the data
3463 correctly for the stores. Return the final references for stores in
3466 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3467 The input is 4 vectors each containing 8 elements. We assign a number to
3468 each element, the input sequence is:
3470 1st vec: 0 1 2 3 4 5 6 7
3471 2nd vec: 8 9 10 11 12 13 14 15
3472 3rd vec: 16 17 18 19 20 21 22 23
3473 4th vec: 24 25 26 27 28 29 30 31
3475 The output sequence should be:
3477 1st vec: 0 8 16 24 1 9 17 25
3478 2nd vec: 2 10 18 26 3 11 19 27
3479 3rd vec: 4 12 20 28 5 13 21 30
3480 4th vec: 6 14 22 30 7 15 23 31
3482 i.e., we interleave the contents of the four vectors in their order.
3484 We use interleave_high/low instructions to create such output. The input of
3485 each interleave_high/low operation is two vectors:
3488 the even elements of the result vector are obtained left-to-right from the
3489 high/low elements of the first vector. The odd elements of the result are
3490 obtained left-to-right from the high/low elements of the second vector.
3491 The output of interleave_high will be: 0 4 1 5
3492 and of interleave_low: 2 6 3 7
3495 The permutation is done in log LENGTH stages. In each stage interleave_high
3496 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3497 where the first argument is taken from the first half of DR_CHAIN and the
3498 second argument from it's second half.
3501 I1: interleave_high (1st vec, 3rd vec)
3502 I2: interleave_low (1st vec, 3rd vec)
3503 I3: interleave_high (2nd vec, 4th vec)
3504 I4: interleave_low (2nd vec, 4th vec)
3506 The output for the first stage is:
3508 I1: 0 16 1 17 2 18 3 19
3509 I2: 4 20 5 21 6 22 7 23
3510 I3: 8 24 9 25 10 26 11 27
3511 I4: 12 28 13 29 14 30 15 31
3513 The output of the second stage, i.e. the final result is:
3515 I1: 0 8 16 24 1 9 17 25
3516 I2: 2 10 18 26 3 11 19 27
3517 I3: 4 12 20 28 5 13 21 30
3518 I4: 6 14 22 30 7 15 23 31. */
3521 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3522 unsigned int length,
3524 gimple_stmt_iterator *gsi,
3525 VEC(tree,heap) **result_chain)
3527 tree perm_dest, vect1, vect2, high, low;
3529 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3532 enum tree_code high_code, low_code;
3534 /* Check that the operation is supported. */
3535 if (!vect_strided_store_supported (vectype))
3538 *result_chain = VEC_copy (tree, heap, dr_chain);
3540 for (i = 0; i < exact_log2 (length); i++)
3542 for (j = 0; j < length/2; j++)
3544 vect1 = VEC_index (tree, dr_chain, j);
3545 vect2 = VEC_index (tree, dr_chain, j+length/2);
3547 /* Create interleaving stmt:
3548 in the case of big endian:
3549 high = interleave_high (vect1, vect2)
3550 and in the case of little endian:
3551 high = interleave_low (vect1, vect2). */
3552 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3553 DECL_GIMPLE_REG_P (perm_dest) = 1;
3554 add_referenced_var (perm_dest);
3555 if (BYTES_BIG_ENDIAN)
3557 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3558 low_code = VEC_INTERLEAVE_LOW_EXPR;
3562 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3563 high_code = VEC_INTERLEAVE_LOW_EXPR;
3565 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3567 high = make_ssa_name (perm_dest, perm_stmt);
3568 gimple_assign_set_lhs (perm_stmt, high);
3569 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3570 VEC_replace (tree, *result_chain, 2*j, high);
3572 /* Create interleaving stmt:
3573 in the case of big endian:
3574 low = interleave_low (vect1, vect2)
3575 and in the case of little endian:
3576 low = interleave_high (vect1, vect2). */
3577 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3578 DECL_GIMPLE_REG_P (perm_dest) = 1;
3579 add_referenced_var (perm_dest);
3580 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3582 low = make_ssa_name (perm_dest, perm_stmt);
3583 gimple_assign_set_lhs (perm_stmt, low);
3584 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3585 VEC_replace (tree, *result_chain, 2*j+1, low);
3587 dr_chain = VEC_copy (tree, heap, *result_chain);
3592 /* Function vect_setup_realignment
3594 This function is called when vectorizing an unaligned load using
3595 the dr_explicit_realign[_optimized] scheme.
3596 This function generates the following code at the loop prolog:
3599 x msq_init = *(floor(p)); # prolog load
3600 realignment_token = call target_builtin;
3602 x msq = phi (msq_init, ---)
3604 The stmts marked with x are generated only for the case of
3605 dr_explicit_realign_optimized.
3607 The code above sets up a new (vector) pointer, pointing to the first
3608 location accessed by STMT, and a "floor-aligned" load using that pointer.
3609 It also generates code to compute the "realignment-token" (if the relevant
3610 target hook was defined), and creates a phi-node at the loop-header bb
3611 whose arguments are the result of the prolog-load (created by this
3612 function) and the result of a load that takes place in the loop (to be
3613 created by the caller to this function).
3615 For the case of dr_explicit_realign_optimized:
3616 The caller to this function uses the phi-result (msq) to create the
3617 realignment code inside the loop, and sets up the missing phi argument,
3620 msq = phi (msq_init, lsq)
3621 lsq = *(floor(p')); # load in loop
3622 result = realign_load (msq, lsq, realignment_token);
3624 For the case of dr_explicit_realign:
3626 msq = *(floor(p)); # load in loop
3628 lsq = *(floor(p')); # load in loop
3629 result = realign_load (msq, lsq, realignment_token);
3632 STMT - (scalar) load stmt to be vectorized. This load accesses
3633 a memory location that may be unaligned.
3634 BSI - place where new code is to be inserted.
3635 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3639 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3640 target hook, if defined.
3641 Return value - the result of the loop-header phi node. */
3644 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3645 tree *realignment_token,
3646 enum dr_alignment_support alignment_support_scheme,
3648 struct loop **at_loop)
3650 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3651 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3652 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3653 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3654 struct loop *loop = NULL;
3656 tree scalar_dest = gimple_assign_lhs (stmt);
3663 tree msq_init = NULL_TREE;
3666 tree msq = NULL_TREE;
3667 gimple_seq stmts = NULL;
3669 bool compute_in_loop = false;
3670 bool nested_in_vect_loop = false;
3671 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3672 struct loop *loop_for_initial_load = NULL;
3676 loop = LOOP_VINFO_LOOP (loop_vinfo);
3677 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3680 gcc_assert (alignment_support_scheme == dr_explicit_realign
3681 || alignment_support_scheme == dr_explicit_realign_optimized);
3683 /* We need to generate three things:
3684 1. the misalignment computation
3685 2. the extra vector load (for the optimized realignment scheme).
3686 3. the phi node for the two vectors from which the realignment is
3687 done (for the optimized realignment scheme). */
3689 /* 1. Determine where to generate the misalignment computation.
3691 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3692 calculation will be generated by this function, outside the loop (in the
3693 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3694 caller, inside the loop.
3696 Background: If the misalignment remains fixed throughout the iterations of
3697 the loop, then both realignment schemes are applicable, and also the
3698 misalignment computation can be done outside LOOP. This is because we are
3699 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3700 are a multiple of VS (the Vector Size), and therefore the misalignment in
3701 different vectorized LOOP iterations is always the same.
3702 The problem arises only if the memory access is in an inner-loop nested
3703 inside LOOP, which is now being vectorized using outer-loop vectorization.
3704 This is the only case when the misalignment of the memory access may not
3705 remain fixed throughout the iterations of the inner-loop (as explained in
3706 detail in vect_supportable_dr_alignment). In this case, not only is the
3707 optimized realignment scheme not applicable, but also the misalignment
3708 computation (and generation of the realignment token that is passed to
3709 REALIGN_LOAD) have to be done inside the loop.
3711 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3712 or not, which in turn determines if the misalignment is computed inside
3713 the inner-loop, or outside LOOP. */
3715 if (init_addr != NULL_TREE || !loop_vinfo)
3717 compute_in_loop = true;
3718 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3722 /* 2. Determine where to generate the extra vector load.
3724 For the optimized realignment scheme, instead of generating two vector
3725 loads in each iteration, we generate a single extra vector load in the
3726 preheader of the loop, and in each iteration reuse the result of the
3727 vector load from the previous iteration. In case the memory access is in
3728 an inner-loop nested inside LOOP, which is now being vectorized using
3729 outer-loop vectorization, we need to determine whether this initial vector
3730 load should be generated at the preheader of the inner-loop, or can be
3731 generated at the preheader of LOOP. If the memory access has no evolution
3732 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3733 to be generated inside LOOP (in the preheader of the inner-loop). */
3735 if (nested_in_vect_loop)
3737 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3738 bool invariant_in_outerloop =
3739 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3740 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3743 loop_for_initial_load = loop;
3745 *at_loop = loop_for_initial_load;
3747 if (loop_for_initial_load)
3748 pe = loop_preheader_edge (loop_for_initial_load);
3750 /* 3. For the case of the optimized realignment, create the first vector
3751 load at the loop preheader. */
3753 if (alignment_support_scheme == dr_explicit_realign_optimized)
3755 /* Create msq_init = *(floor(p1)) in the loop preheader */
3757 gcc_assert (!compute_in_loop);
3758 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3759 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3760 &init_addr, &inc, true, &inv_p);
3761 new_stmt = gimple_build_assign_with_ops
3762 (BIT_AND_EXPR, NULL_TREE, ptr,
3763 build_int_cst (TREE_TYPE (ptr),
3764 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3765 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3766 gimple_assign_set_lhs (new_stmt, new_temp);
3767 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3768 gcc_assert (!new_bb);
3770 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3771 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3772 new_stmt = gimple_build_assign (vec_dest, data_ref);
3773 new_temp = make_ssa_name (vec_dest, new_stmt);
3774 gimple_assign_set_lhs (new_stmt, new_temp);
3775 mark_symbols_for_renaming (new_stmt);
3778 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3779 gcc_assert (!new_bb);
3782 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3784 msq_init = gimple_assign_lhs (new_stmt);
3787 /* 4. Create realignment token using a target builtin, if available.
3788 It is done either inside the containing loop, or before LOOP (as
3789 determined above). */
3791 if (targetm.vectorize.builtin_mask_for_load)
3795 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3798 /* Generate the INIT_ADDR computation outside LOOP. */
3799 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3803 pe = loop_preheader_edge (loop);
3804 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3805 gcc_assert (!new_bb);
3808 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3811 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3812 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3814 vect_create_destination_var (scalar_dest,
3815 gimple_call_return_type (new_stmt));
3816 new_temp = make_ssa_name (vec_dest, new_stmt);
3817 gimple_call_set_lhs (new_stmt, new_temp);
3819 if (compute_in_loop)
3820 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3823 /* Generate the misalignment computation outside LOOP. */
3824 pe = loop_preheader_edge (loop);
3825 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3826 gcc_assert (!new_bb);
3829 *realignment_token = gimple_call_lhs (new_stmt);
3831 /* The result of the CALL_EXPR to this builtin is determined from
3832 the value of the parameter and no global variables are touched
3833 which makes the builtin a "const" function. Requiring the
3834 builtin to have the "const" attribute makes it unnecessary
3835 to call mark_call_clobbered. */
3836 gcc_assert (TREE_READONLY (builtin_decl));
3839 if (alignment_support_scheme == dr_explicit_realign)
3842 gcc_assert (!compute_in_loop);
3843 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3846 /* 5. Create msq = phi <msq_init, lsq> in loop */
3848 pe = loop_preheader_edge (containing_loop);
3849 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3850 msq = make_ssa_name (vec_dest, NULL);
3851 phi_stmt = create_phi_node (msq, containing_loop->header);
3852 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3853 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3859 /* Function vect_strided_load_supported.
3861 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3862 and FALSE otherwise. */
3865 vect_strided_load_supported (tree vectype)
3867 optab perm_even_optab, perm_odd_optab;
3868 enum machine_mode mode;
3870 mode = TYPE_MODE (vectype);
3872 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3874 if (!perm_even_optab)
3876 if (vect_print_dump_info (REPORT_DETAILS))
3877 fprintf (vect_dump, "no optab for perm_even.");
3881 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3883 if (vect_print_dump_info (REPORT_DETAILS))
3884 fprintf (vect_dump, "perm_even op not supported by target.");
3888 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3890 if (!perm_odd_optab)
3892 if (vect_print_dump_info (REPORT_DETAILS))
3893 fprintf (vect_dump, "no optab for perm_odd.");
3897 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3899 if (vect_print_dump_info (REPORT_DETAILS))
3900 fprintf (vect_dump, "perm_odd op not supported by target.");
3907 /* Function vect_permute_load_chain.
3909 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3910 a power of 2, generate extract_even/odd stmts to reorder the input data
3911 correctly. Return the final references for loads in RESULT_CHAIN.
3913 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3914 The input is 4 vectors each containing 8 elements. We assign a number to each
3915 element, the input sequence is:
3917 1st vec: 0 1 2 3 4 5 6 7
3918 2nd vec: 8 9 10 11 12 13 14 15
3919 3rd vec: 16 17 18 19 20 21 22 23
3920 4th vec: 24 25 26 27 28 29 30 31
3922 The output sequence should be:
3924 1st vec: 0 4 8 12 16 20 24 28
3925 2nd vec: 1 5 9 13 17 21 25 29
3926 3rd vec: 2 6 10 14 18 22 26 30
3927 4th vec: 3 7 11 15 19 23 27 31
3929 i.e., the first output vector should contain the first elements of each
3930 interleaving group, etc.
3932 We use extract_even/odd instructions to create such output. The input of
3933 each extract_even/odd operation is two vectors
3937 and the output is the vector of extracted even/odd elements. The output of
3938 extract_even will be: 0 2 4 6
3939 and of extract_odd: 1 3 5 7
3942 The permutation is done in log LENGTH stages. In each stage extract_even
3943 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3944 their order. In our example,
3946 E1: extract_even (1st vec, 2nd vec)
3947 E2: extract_odd (1st vec, 2nd vec)
3948 E3: extract_even (3rd vec, 4th vec)
3949 E4: extract_odd (3rd vec, 4th vec)
3951 The output for the first stage will be:
3953 E1: 0 2 4 6 8 10 12 14
3954 E2: 1 3 5 7 9 11 13 15
3955 E3: 16 18 20 22 24 26 28 30
3956 E4: 17 19 21 23 25 27 29 31
3958 In order to proceed and create the correct sequence for the next stage (or
3959 for the correct output, if the second stage is the last one, as in our
3960 example), we first put the output of extract_even operation and then the
3961 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3962 The input for the second stage is:
3964 1st vec (E1): 0 2 4 6 8 10 12 14
3965 2nd vec (E3): 16 18 20 22 24 26 28 30
3966 3rd vec (E2): 1 3 5 7 9 11 13 15
3967 4th vec (E4): 17 19 21 23 25 27 29 31
3969 The output of the second stage:
3971 E1: 0 4 8 12 16 20 24 28
3972 E2: 2 6 10 14 18 22 26 30
3973 E3: 1 5 9 13 17 21 25 29
3974 E4: 3 7 11 15 19 23 27 31
3976 And RESULT_CHAIN after reordering:
3978 1st vec (E1): 0 4 8 12 16 20 24 28
3979 2nd vec (E3): 1 5 9 13 17 21 25 29
3980 3rd vec (E2): 2 6 10 14 18 22 26 30
3981 4th vec (E4): 3 7 11 15 19 23 27 31. */
3984 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3985 unsigned int length,
3987 gimple_stmt_iterator *gsi,
3988 VEC(tree,heap) **result_chain)
3990 tree perm_dest, data_ref, first_vect, second_vect;
3992 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3996 /* Check that the operation is supported. */
3997 if (!vect_strided_load_supported (vectype))
4000 *result_chain = VEC_copy (tree, heap, dr_chain);
4001 for (i = 0; i < exact_log2 (length); i++)
4003 for (j = 0; j < length; j +=2)
4005 first_vect = VEC_index (tree, dr_chain, j);
4006 second_vect = VEC_index (tree, dr_chain, j+1);
4008 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4009 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4010 DECL_GIMPLE_REG_P (perm_dest) = 1;
4011 add_referenced_var (perm_dest);
4013 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4014 perm_dest, first_vect,
4017 data_ref = make_ssa_name (perm_dest, perm_stmt);
4018 gimple_assign_set_lhs (perm_stmt, data_ref);
4019 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4020 mark_symbols_for_renaming (perm_stmt);
4022 VEC_replace (tree, *result_chain, j/2, data_ref);
4024 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4025 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4026 DECL_GIMPLE_REG_P (perm_dest) = 1;
4027 add_referenced_var (perm_dest);
4029 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4030 perm_dest, first_vect,
4032 data_ref = make_ssa_name (perm_dest, perm_stmt);
4033 gimple_assign_set_lhs (perm_stmt, data_ref);
4034 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4035 mark_symbols_for_renaming (perm_stmt);
4037 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4039 dr_chain = VEC_copy (tree, heap, *result_chain);
4045 /* Function vect_transform_strided_load.
4047 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4048 to perform their permutation and ascribe the result vectorized statements to
4049 the scalar statements.
4053 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4054 gimple_stmt_iterator *gsi)
4056 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4057 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4058 gimple next_stmt, new_stmt;
4059 VEC(tree,heap) *result_chain = NULL;
4060 unsigned int i, gap_count;
4063 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4064 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4065 vectors, that are ready for vector computation. */
4066 result_chain = VEC_alloc (tree, heap, size);
4068 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
4071 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4072 Since we scan the chain starting from it's first node, their order
4073 corresponds the order of data-refs in RESULT_CHAIN. */
4074 next_stmt = first_stmt;
4076 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4081 /* Skip the gaps. Loads created for the gaps will be removed by dead
4082 code elimination pass later. No need to check for the first stmt in
4083 the group, since it always exists.
4084 DR_GROUP_GAP is the number of steps in elements from the previous
4085 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4086 correspond to the gaps. */
4087 if (next_stmt != first_stmt
4088 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4096 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4097 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4098 copies, and we put the new vector statement in the first available
4100 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4101 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4104 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4107 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4109 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4112 prev_stmt = rel_stmt;
4114 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4117 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4122 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4124 /* If NEXT_STMT accesses the same DR as the previous statement,
4125 put the same TMP_DATA_REF as its vectorized statement; otherwise
4126 get the next data-ref from RESULT_CHAIN. */
4127 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4132 VEC_free (tree, heap, result_chain);
4136 /* Function vect_force_dr_alignment_p.
4138 Returns whether the alignment of a DECL can be forced to be aligned
4139 on ALIGNMENT bit boundary. */
4142 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4144 if (TREE_CODE (decl) != VAR_DECL)
4147 if (DECL_EXTERNAL (decl))
4150 if (TREE_ASM_WRITTEN (decl))
4153 if (TREE_STATIC (decl))
4154 return (alignment <= MAX_OFILE_ALIGNMENT);
4156 return (alignment <= MAX_STACK_ALIGNMENT);
4160 /* Return whether the data reference DR is supported with respect to its
4162 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4163 it is aligned, i.e., check if it is possible to vectorize it with different
4166 enum dr_alignment_support
4167 vect_supportable_dr_alignment (struct data_reference *dr,
4168 bool check_aligned_accesses)
4170 gimple stmt = DR_STMT (dr);
4171 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4172 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4173 enum machine_mode mode = TYPE_MODE (vectype);
4174 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4175 struct loop *vect_loop = NULL;
4176 bool nested_in_vect_loop = false;
4178 if (aligned_access_p (dr) && !check_aligned_accesses)
4183 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4184 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4187 /* Possibly unaligned access. */
4189 /* We can choose between using the implicit realignment scheme (generating
4190 a misaligned_move stmt) and the explicit realignment scheme (generating
4191 aligned loads with a REALIGN_LOAD). There are two variants to the
4192 explicit realignment scheme: optimized, and unoptimized.
4193 We can optimize the realignment only if the step between consecutive
4194 vector loads is equal to the vector size. Since the vector memory
4195 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4196 is guaranteed that the misalignment amount remains the same throughout the
4197 execution of the vectorized loop. Therefore, we can create the
4198 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4199 at the loop preheader.
4201 However, in the case of outer-loop vectorization, when vectorizing a
4202 memory access in the inner-loop nested within the LOOP that is now being
4203 vectorized, while it is guaranteed that the misalignment of the
4204 vectorized memory access will remain the same in different outer-loop
4205 iterations, it is *not* guaranteed that is will remain the same throughout
4206 the execution of the inner-loop. This is because the inner-loop advances
4207 with the original scalar step (and not in steps of VS). If the inner-loop
4208 step happens to be a multiple of VS, then the misalignment remains fixed
4209 and we can use the optimized realignment scheme. For example:
4215 When vectorizing the i-loop in the above example, the step between
4216 consecutive vector loads is 1, and so the misalignment does not remain
4217 fixed across the execution of the inner-loop, and the realignment cannot
4218 be optimized (as illustrated in the following pseudo vectorized loop):
4220 for (i=0; i<N; i+=4)
4221 for (j=0; j<M; j++){
4222 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4223 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4224 // (assuming that we start from an aligned address).
4227 We therefore have to use the unoptimized realignment scheme:
4229 for (i=0; i<N; i+=4)
4230 for (j=k; j<M; j+=4)
4231 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4232 // that the misalignment of the initial address is
4235 The loop can then be vectorized as follows:
4237 for (k=0; k<4; k++){
4238 rt = get_realignment_token (&vp[k]);
4239 for (i=0; i<N; i+=4){
4241 for (j=k; j<M; j+=4){
4243 va = REALIGN_LOAD <v1,v2,rt>;
4250 if (DR_IS_READ (dr))
4252 bool is_packed = false;
4253 tree type = (TREE_TYPE (DR_REF (dr)));
4255 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4256 && (!targetm.vectorize.builtin_mask_for_load
4257 || targetm.vectorize.builtin_mask_for_load ()))
4259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4260 if ((nested_in_vect_loop
4261 && (TREE_INT_CST_LOW (DR_STEP (dr))
4262 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4264 return dr_explicit_realign;
4266 return dr_explicit_realign_optimized;
4268 if (!known_alignment_for_access_p (dr))
4270 tree ba = DR_BASE_OBJECT (dr);
4273 is_packed = contains_packed_reference (ba);
4276 if (targetm.vectorize.
4277 support_vector_misalignment (mode, type,
4278 DR_MISALIGNMENT (dr), is_packed))
4279 /* Can't software pipeline the loads, but can at least do them. */
4280 return dr_unaligned_supported;
4284 bool is_packed = false;
4285 tree type = (TREE_TYPE (DR_REF (dr)));
4287 if (!known_alignment_for_access_p (dr))
4289 tree ba = DR_BASE_OBJECT (dr);
4292 is_packed = contains_packed_reference (ba);
4295 if (targetm.vectorize.
4296 support_vector_misalignment (mode, type,
4297 DR_MISALIGNMENT (dr), is_packed))
4298 return dr_unaligned_supported;
4302 return dr_unaligned_unsupported;