1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 scalar_type = rhs_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
102 while (next_stmt && next_stmt != stmt)
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
166 NEXT(FIRST_DR) = previous FIRST_DR
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
293 /* Function vect_equal_offsets.
295 Check if OFFSET1 and OFFSET2 are identical expressions. */
298 vect_equal_offsets (tree offset1, tree offset2)
302 STRIP_NOPS (offset1);
303 STRIP_NOPS (offset2);
305 if (offset1 == offset2)
308 if (TREE_CODE (offset1) != TREE_CODE (offset2)
309 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
312 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
313 TREE_OPERAND (offset2, 0));
315 if (!res || !BINARY_CLASS_P (offset1))
318 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
319 TREE_OPERAND (offset2, 1));
325 /* Check dependence between DRA and DRB for basic block vectorization.
326 If the accesses share same bases and offsets, we can compare their initial
327 constant offsets to decide whether they differ or not. In case of a read-
328 write dependence we check that the load is before the store to ensure that
329 vectorization will not change the order of the accesses. */
332 vect_drs_dependent_in_basic_block (struct data_reference *dra,
333 struct data_reference *drb)
335 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
338 /* We only call this function for pairs of loads and stores, but we verify
340 if (DR_IS_READ (dra) == DR_IS_READ (drb))
342 if (DR_IS_READ (dra))
348 /* Check that the data-refs have same bases and offsets. If not, we can't
349 determine if they are dependent. */
350 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
351 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
352 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
353 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
354 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
355 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
358 /* Check the types. */
359 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
360 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
362 if (type_size_a != type_size_b
363 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
364 TREE_TYPE (DR_REF (drb))))
367 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
368 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
370 /* Two different locations - no dependence. */
371 if (init_a != init_b)
374 /* We have a read-write dependence. Check that the load is before the store.
375 When we vectorize basic blocks, vector load can be only before
376 corresponding scalar load, and vector store can be only after its
377 corresponding scalar store. So the order of the acceses is preserved in
378 case the load is before the store. */
379 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
380 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
387 /* Function vect_check_interleaving.
389 Check if DRA and DRB are a part of interleaving. In case they are, insert
390 DRA and DRB in an interleaving chain. */
393 vect_check_interleaving (struct data_reference *dra,
394 struct data_reference *drb)
396 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
398 /* Check that the data-refs have same first location (except init) and they
399 are both either store or load (not load and store). */
400 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
401 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
402 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
403 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
404 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
405 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
406 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
407 || DR_IS_READ (dra) != DR_IS_READ (drb))
411 1. data-refs are of the same type
412 2. their steps are equal
413 3. the step (if greater than zero) is greater than the difference between
415 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
416 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
418 if (type_size_a != type_size_b
419 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
420 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
421 TREE_TYPE (DR_REF (drb))))
424 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
425 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
426 step = TREE_INT_CST_LOW (DR_STEP (dra));
430 /* If init_a == init_b + the size of the type * k, we have an interleaving,
431 and DRB is accessed before DRA. */
432 diff_mod_size = (init_a - init_b) % type_size_a;
434 if (step && (init_a - init_b) > step)
437 if (diff_mod_size == 0)
439 vect_update_interleaving_chain (drb, dra);
440 if (vect_print_dump_info (REPORT_DR_DETAILS))
442 fprintf (vect_dump, "Detected interleaving ");
443 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
444 fprintf (vect_dump, " and ");
445 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
452 /* If init_b == init_a + the size of the type * k, we have an
453 interleaving, and DRA is accessed before DRB. */
454 diff_mod_size = (init_b - init_a) % type_size_a;
456 if (step && (init_b - init_a) > step)
459 if (diff_mod_size == 0)
461 vect_update_interleaving_chain (dra, drb);
462 if (vect_print_dump_info (REPORT_DR_DETAILS))
464 fprintf (vect_dump, "Detected interleaving ");
465 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
466 fprintf (vect_dump, " and ");
467 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
476 /* Check if data references pointed by DR_I and DR_J are same or
477 belong to same interleaving group. Return FALSE if drs are
478 different, otherwise return TRUE. */
481 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
483 gimple stmt_i = DR_STMT (dr_i);
484 gimple stmt_j = DR_STMT (dr_j);
486 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
487 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
488 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
489 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
490 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
496 /* If address ranges represented by DDR_I and DDR_J are equal,
497 return TRUE, otherwise return FALSE. */
500 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
502 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
504 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
505 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
511 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
512 tested at run-time. Return TRUE if DDR was successfully inserted.
513 Return false if versioning is not supported. */
516 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
518 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
520 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
523 if (vect_print_dump_info (REPORT_DR_DETAILS))
525 fprintf (vect_dump, "mark for run-time aliasing test between ");
526 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
527 fprintf (vect_dump, " and ");
528 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
531 if (optimize_loop_nest_for_size_p (loop))
533 if (vect_print_dump_info (REPORT_DR_DETAILS))
534 fprintf (vect_dump, "versioning not supported when optimizing for size.");
538 /* FORNOW: We don't support versioning with outer-loop vectorization. */
541 if (vect_print_dump_info (REPORT_DR_DETAILS))
542 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
546 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
551 /* Function vect_analyze_data_ref_dependence.
553 Return TRUE if there (might) exist a dependence between a memory-reference
554 DRA and a memory-reference DRB. When versioning for alias may check a
555 dependence at run-time, return FALSE. Adjust *MAX_VF according to
556 the data dependence. */
559 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
560 loop_vec_info loop_vinfo, int *max_vf,
561 bool *data_dependence_in_bb)
564 struct loop *loop = NULL;
565 struct data_reference *dra = DDR_A (ddr);
566 struct data_reference *drb = DDR_B (ddr);
567 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
568 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
569 lambda_vector dist_v;
570 unsigned int loop_depth;
572 /* Don't bother to analyze statements marked as unvectorizable. */
573 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
574 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
577 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
579 /* Independent data accesses. */
580 vect_check_interleaving (dra, drb);
585 loop = LOOP_VINFO_LOOP (loop_vinfo);
587 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
590 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
594 if (vect_print_dump_info (REPORT_DR_DETAILS))
596 fprintf (vect_dump, "versioning for alias required: "
597 "can't determine dependence between ");
598 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
599 fprintf (vect_dump, " and ");
600 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
603 /* Add to list of ddrs that need to be tested at run-time. */
604 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
607 /* When vectorizing a basic block unknown depnedence can still mean
609 if (vect_check_interleaving (dra, drb))
612 if (vect_print_dump_info (REPORT_DR_DETAILS))
614 fprintf (vect_dump, "can't determine dependence between ");
615 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
616 fprintf (vect_dump, " and ");
617 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
620 /* We do not vectorize basic blocks with write-write dependencies. */
621 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
624 /* We deal with read-write dependencies in basic blocks later (by
625 verifying that all the loads in the basic block are before all the
627 *data_dependence_in_bb = true;
631 /* Versioning for alias is not yet supported for basic block SLP, and
632 dependence distance is unapplicable, hence, in case of known data
633 dependence, basic block vectorization is impossible for now. */
636 if (dra != drb && vect_check_interleaving (dra, drb))
639 if (vect_print_dump_info (REPORT_DR_DETAILS))
641 fprintf (vect_dump, "determined dependence between ");
642 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
643 fprintf (vect_dump, " and ");
644 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
647 /* Do not vectorize basic blcoks with write-write dependences. */
648 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
651 /* Check if this dependence is allowed in basic block vectorization. */
652 return vect_drs_dependent_in_basic_block (dra, drb);
655 /* Loop-based vectorization and known data dependence. */
656 if (DDR_NUM_DIST_VECTS (ddr) == 0)
658 if (vect_print_dump_info (REPORT_DR_DETAILS))
660 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 fprintf (vect_dump, " and ");
663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
665 /* Add to list of ddrs that need to be tested at run-time. */
666 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
669 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
670 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
672 int dist = dist_v[loop_depth];
674 if (vect_print_dump_info (REPORT_DR_DETAILS))
675 fprintf (vect_dump, "dependence distance = %d.", dist);
679 if (vect_print_dump_info (REPORT_DR_DETAILS))
681 fprintf (vect_dump, "dependence distance == 0 between ");
682 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
683 fprintf (vect_dump, " and ");
684 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
687 /* For interleaving, mark that there is a read-write dependency if
688 necessary. We check before that one of the data-refs is store. */
689 if (DR_IS_READ (dra))
690 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
693 if (DR_IS_READ (drb))
694 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
700 if (dist > 0 && DDR_REVERSED_P (ddr))
702 /* If DDR_REVERSED_P the order of the data-refs in DDR was
703 reversed (to make distance vector positive), and the actual
704 distance is negative. */
705 if (vect_print_dump_info (REPORT_DR_DETAILS))
706 fprintf (vect_dump, "dependence distance negative.");
711 && abs (dist) < *max_vf)
713 /* The dependence distance requires reduction of the maximal
714 vectorization factor. */
715 *max_vf = abs (dist);
716 if (vect_print_dump_info (REPORT_DR_DETAILS))
717 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
721 if (abs (dist) >= *max_vf)
723 /* Dependence distance does not create dependence, as far as
724 vectorization is concerned, in this case. */
725 if (vect_print_dump_info (REPORT_DR_DETAILS))
726 fprintf (vect_dump, "dependence distance >= VF.");
730 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
732 fprintf (vect_dump, "not vectorized, possible dependence "
733 "between data-refs ");
734 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
735 fprintf (vect_dump, " and ");
736 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
745 /* Function vect_analyze_data_ref_dependences.
747 Examine all the data references in the loop, and make sure there do not
748 exist any data dependences between them. Set *MAX_VF according to
749 the maximum vectorization factor the data dependences allow. */
752 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
753 bb_vec_info bb_vinfo, int *max_vf,
754 bool *data_dependence_in_bb)
757 VEC (ddr_p, heap) *ddrs = NULL;
758 struct data_dependence_relation *ddr;
760 if (vect_print_dump_info (REPORT_DETAILS))
761 fprintf (vect_dump, "=== vect_analyze_dependences ===");
764 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
766 ddrs = BB_VINFO_DDRS (bb_vinfo);
768 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
769 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
770 data_dependence_in_bb))
777 /* Function vect_compute_data_ref_alignment
779 Compute the misalignment of the data reference DR.
782 1. If during the misalignment computation it is found that the data reference
783 cannot be vectorized then false is returned.
784 2. DR_MISALIGNMENT (DR) is defined.
786 FOR NOW: No analysis is actually performed. Misalignment is calculated
787 only for trivial cases. TODO. */
790 vect_compute_data_ref_alignment (struct data_reference *dr)
792 gimple stmt = DR_STMT (dr);
793 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
794 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
795 struct loop *loop = NULL;
796 tree ref = DR_REF (dr);
798 tree base, base_addr;
801 tree aligned_to, alignment;
803 if (vect_print_dump_info (REPORT_DETAILS))
804 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
807 loop = LOOP_VINFO_LOOP (loop_vinfo);
809 /* Initialize misalignment to unknown. */
810 SET_DR_MISALIGNMENT (dr, -1);
812 misalign = DR_INIT (dr);
813 aligned_to = DR_ALIGNED_TO (dr);
814 base_addr = DR_BASE_ADDRESS (dr);
815 vectype = STMT_VINFO_VECTYPE (stmt_info);
817 /* In case the dataref is in an inner-loop of the loop that is being
818 vectorized (LOOP), we use the base and misalignment information
819 relative to the outer-loop (LOOP). This is ok only if the misalignment
820 stays the same throughout the execution of the inner-loop, which is why
821 we have to check that the stride of the dataref in the inner-loop evenly
822 divides by the vector size. */
823 if (loop && nested_in_vect_loop_p (loop, stmt))
825 tree step = DR_STEP (dr);
826 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
828 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
830 if (vect_print_dump_info (REPORT_ALIGNMENT))
831 fprintf (vect_dump, "inner step divides the vector-size.");
832 misalign = STMT_VINFO_DR_INIT (stmt_info);
833 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
834 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
838 if (vect_print_dump_info (REPORT_ALIGNMENT))
839 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
840 misalign = NULL_TREE;
844 base = build_fold_indirect_ref (base_addr);
845 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
847 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
850 if (vect_print_dump_info (REPORT_ALIGNMENT))
852 fprintf (vect_dump, "Unknown alignment for access: ");
853 print_generic_expr (vect_dump, base, TDF_SLIM);
859 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
861 || (TREE_CODE (base_addr) == SSA_NAME
862 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
863 TREE_TYPE (base_addr)))),
867 base_aligned = false;
871 /* Do not change the alignment of global variables if
872 flag_section_anchors is enabled. */
873 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
874 || (TREE_STATIC (base) && flag_section_anchors))
876 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "can't force alignment of ref: ");
879 print_generic_expr (vect_dump, ref, TDF_SLIM);
884 /* Force the alignment of the decl.
885 NOTE: This is the only change to the code we make during
886 the analysis phase, before deciding to vectorize the loop. */
887 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "force alignment of ");
890 print_generic_expr (vect_dump, ref, TDF_SLIM);
893 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
894 DECL_USER_ALIGN (base) = 1;
897 /* At this point we assume that the base is aligned. */
898 gcc_assert (base_aligned
899 || (TREE_CODE (base) == VAR_DECL
900 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
902 /* If this is a backward running DR then first access in the larger
903 vectype actually is N-1 elements before the address in the DR.
904 Adjust misalign accordingly. */
905 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
907 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
908 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
909 otherwise we wouldn't be here. */
910 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
911 /* PLUS because DR_STEP was negative. */
912 misalign = size_binop (PLUS_EXPR, misalign, offset);
915 /* Modulo alignment. */
916 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
918 if (!host_integerp (misalign, 1))
920 /* Negative or overflowed misalignment value. */
921 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "unexpected misalign value");
926 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
928 if (vect_print_dump_info (REPORT_DETAILS))
930 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
931 print_generic_expr (vect_dump, ref, TDF_SLIM);
938 /* Function vect_compute_data_refs_alignment
940 Compute the misalignment of data references in the loop.
941 Return FALSE if a data reference is found that cannot be vectorized. */
944 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
945 bb_vec_info bb_vinfo)
947 VEC (data_reference_p, heap) *datarefs;
948 struct data_reference *dr;
952 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
954 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
956 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
957 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
958 && !vect_compute_data_ref_alignment (dr))
962 /* Mark unsupported statement as unvectorizable. */
963 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
974 /* Function vect_update_misalignment_for_peel
976 DR - the data reference whose misalignment is to be adjusted.
977 DR_PEEL - the data reference whose misalignment is being made
978 zero in the vector loop by the peel.
979 NPEEL - the number of iterations in the peel loop if the misalignment
980 of DR_PEEL is known at compile time. */
983 vect_update_misalignment_for_peel (struct data_reference *dr,
984 struct data_reference *dr_peel, int npeel)
987 VEC(dr_p,heap) *same_align_drs;
988 struct data_reference *current_dr;
989 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
990 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
991 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
992 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
994 /* For interleaved data accesses the step in the loop must be multiplied by
995 the size of the interleaving group. */
996 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
997 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
998 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
999 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
1001 /* It can be assumed that the data refs with the same alignment as dr_peel
1002 are aligned in the vector loop. */
1004 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1005 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1007 if (current_dr != dr)
1009 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1010 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1011 SET_DR_MISALIGNMENT (dr, 0);
1015 if (known_alignment_for_access_p (dr)
1016 && known_alignment_for_access_p (dr_peel))
1018 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1019 int misal = DR_MISALIGNMENT (dr);
1020 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1021 misal += negative ? -npeel * dr_size : npeel * dr_size;
1022 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1023 SET_DR_MISALIGNMENT (dr, misal);
1027 if (vect_print_dump_info (REPORT_DETAILS))
1028 fprintf (vect_dump, "Setting misalignment to -1.");
1029 SET_DR_MISALIGNMENT (dr, -1);
1033 /* Function vect_verify_datarefs_alignment
1035 Return TRUE if all data references in the loop can be
1036 handled with respect to alignment. */
1039 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1041 VEC (data_reference_p, heap) *datarefs;
1042 struct data_reference *dr;
1043 enum dr_alignment_support supportable_dr_alignment;
1047 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1049 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1051 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1053 gimple stmt = DR_STMT (dr);
1054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1056 /* For interleaving, only the alignment of the first access matters.
1057 Skip statements marked as not vectorizable. */
1058 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1059 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1060 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1063 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1064 if (!supportable_dr_alignment)
1066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1068 if (DR_IS_READ (dr))
1070 "not vectorized: unsupported unaligned load.");
1073 "not vectorized: unsupported unaligned store.");
1075 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1079 if (supportable_dr_alignment != dr_aligned
1080 && vect_print_dump_info (REPORT_ALIGNMENT))
1081 fprintf (vect_dump, "Vectorizing an unaligned access.");
1087 /* Function vector_alignment_reachable_p
1089 Return true if vector alignment for DR is reachable by peeling
1090 a few loop iterations. Return false otherwise. */
1093 vector_alignment_reachable_p (struct data_reference *dr)
1095 gimple stmt = DR_STMT (dr);
1096 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1099 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1101 /* For interleaved access we peel only if number of iterations in
1102 the prolog loop ({VF - misalignment}), is a multiple of the
1103 number of the interleaved accesses. */
1104 int elem_size, mis_in_elements;
1105 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1107 /* FORNOW: handle only known alignment. */
1108 if (!known_alignment_for_access_p (dr))
1111 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1112 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1114 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1118 /* If misalignment is known at the compile time then allow peeling
1119 only if natural alignment is reachable through peeling. */
1120 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1122 HOST_WIDE_INT elmsize =
1123 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1124 if (vect_print_dump_info (REPORT_DETAILS))
1126 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1127 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1129 if (DR_MISALIGNMENT (dr) % elmsize)
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1137 if (!known_alignment_for_access_p (dr))
1139 tree type = (TREE_TYPE (DR_REF (dr)));
1140 tree ba = DR_BASE_OBJECT (dr);
1141 bool is_packed = false;
1144 is_packed = contains_packed_reference (ba);
1146 if (vect_print_dump_info (REPORT_DETAILS))
1147 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1148 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1158 /* Calculate the cost of the memory access represented by DR. */
1161 vect_get_data_access_cost (struct data_reference *dr,
1162 unsigned int *inside_cost,
1163 unsigned int *outside_cost)
1165 gimple stmt = DR_STMT (dr);
1166 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1167 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1168 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1169 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1170 int ncopies = vf / nunits;
1171 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1173 if (!supportable_dr_alignment)
1174 *inside_cost = VECT_MAX_COST;
1177 if (DR_IS_READ (dr))
1178 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1180 vect_get_store_cost (dr, ncopies, inside_cost);
1183 if (vect_print_dump_info (REPORT_COST))
1184 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1185 "outside_cost = %d.", *inside_cost, *outside_cost);
1190 vect_peeling_hash (const void *elem)
1192 const struct _vect_peel_info *peel_info;
1194 peel_info = (const struct _vect_peel_info *) elem;
1195 return (hashval_t) peel_info->npeel;
1200 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1202 const struct _vect_peel_info *a, *b;
1204 a = (const struct _vect_peel_info *) elem1;
1205 b = (const struct _vect_peel_info *) elem2;
1206 return (a->npeel == b->npeel);
1210 /* Insert DR into peeling hash table with NPEEL as key. */
1213 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1216 struct _vect_peel_info elem, *slot;
1218 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1221 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1227 slot = XNEW (struct _vect_peel_info);
1228 slot->npeel = npeel;
1231 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1236 if (!supportable_dr_alignment && !flag_vect_cost_model)
1237 slot->count += VECT_MAX_COST;
1241 /* Traverse peeling hash table to find peeling option that aligns maximum
1242 number of data accesses. */
1245 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1247 vect_peel_info elem = (vect_peel_info) *slot;
1248 vect_peel_extended_info max = (vect_peel_extended_info) data;
1250 if (elem->count > max->peel_info.count)
1252 max->peel_info.npeel = elem->npeel;
1253 max->peel_info.count = elem->count;
1254 max->peel_info.dr = elem->dr;
1261 /* Traverse peeling hash table and calculate cost for each peeling option.
1262 Find the one with the lowest cost. */
1265 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1267 vect_peel_info elem = (vect_peel_info) *slot;
1268 vect_peel_extended_info min = (vect_peel_extended_info) data;
1269 int save_misalignment, dummy;
1270 unsigned int inside_cost = 0, outside_cost = 0, i;
1271 gimple stmt = DR_STMT (elem->dr);
1272 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1273 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1274 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1275 struct data_reference *dr;
1277 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1279 stmt = DR_STMT (dr);
1280 stmt_info = vinfo_for_stmt (stmt);
1281 /* For interleaving, only the alignment of the first access
1283 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1284 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1287 save_misalignment = DR_MISALIGNMENT (dr);
1288 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1289 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1290 SET_DR_MISALIGNMENT (dr, save_misalignment);
1293 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1294 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1296 if (inside_cost < min->inside_cost
1297 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1299 min->inside_cost = inside_cost;
1300 min->outside_cost = outside_cost;
1301 min->peel_info.dr = elem->dr;
1302 min->peel_info.npeel = elem->npeel;
1309 /* Choose best peeling option by traversing peeling hash table and either
1310 choosing an option with the lowest cost (if cost model is enabled) or the
1311 option that aligns as many accesses as possible. */
1313 static struct data_reference *
1314 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1315 unsigned int *npeel)
1317 struct _vect_peel_extended_info res;
1319 res.peel_info.dr = NULL;
1321 if (flag_vect_cost_model)
1323 res.inside_cost = INT_MAX;
1324 res.outside_cost = INT_MAX;
1325 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1326 vect_peeling_hash_get_lowest_cost, &res);
1330 res.peel_info.count = 0;
1331 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1332 vect_peeling_hash_get_most_frequent, &res);
1335 *npeel = res.peel_info.npeel;
1336 return res.peel_info.dr;
1340 /* Function vect_enhance_data_refs_alignment
1342 This pass will use loop versioning and loop peeling in order to enhance
1343 the alignment of data references in the loop.
1345 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1346 original loop is to be vectorized. Any other loops that are created by
1347 the transformations performed in this pass - are not supposed to be
1348 vectorized. This restriction will be relaxed.
1350 This pass will require a cost model to guide it whether to apply peeling
1351 or versioning or a combination of the two. For example, the scheme that
1352 intel uses when given a loop with several memory accesses, is as follows:
1353 choose one memory access ('p') which alignment you want to force by doing
1354 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1355 other accesses are not necessarily aligned, or (2) use loop versioning to
1356 generate one loop in which all accesses are aligned, and another loop in
1357 which only 'p' is necessarily aligned.
1359 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1360 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1361 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1363 Devising a cost model is the most critical aspect of this work. It will
1364 guide us on which access to peel for, whether to use loop versioning, how
1365 many versions to create, etc. The cost model will probably consist of
1366 generic considerations as well as target specific considerations (on
1367 powerpc for example, misaligned stores are more painful than misaligned
1370 Here are the general steps involved in alignment enhancements:
1372 -- original loop, before alignment analysis:
1373 for (i=0; i<N; i++){
1374 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1375 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1378 -- After vect_compute_data_refs_alignment:
1379 for (i=0; i<N; i++){
1380 x = q[i]; # DR_MISALIGNMENT(q) = 3
1381 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1384 -- Possibility 1: we do loop versioning:
1386 for (i=0; i<N; i++){ # loop 1A
1387 x = q[i]; # DR_MISALIGNMENT(q) = 3
1388 p[i] = y; # DR_MISALIGNMENT(p) = 0
1392 for (i=0; i<N; i++){ # loop 1B
1393 x = q[i]; # DR_MISALIGNMENT(q) = 3
1394 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1398 -- Possibility 2: we do loop peeling:
1399 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1403 for (i = 3; i < N; i++){ # loop 2A
1404 x = q[i]; # DR_MISALIGNMENT(q) = 0
1405 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1408 -- Possibility 3: combination of loop peeling and versioning:
1409 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1414 for (i = 3; i<N; i++){ # loop 3A
1415 x = q[i]; # DR_MISALIGNMENT(q) = 0
1416 p[i] = y; # DR_MISALIGNMENT(p) = 0
1420 for (i = 3; i<N; i++){ # loop 3B
1421 x = q[i]; # DR_MISALIGNMENT(q) = 0
1422 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1426 These loops are later passed to loop_transform to be vectorized. The
1427 vectorizer will use the alignment information to guide the transformation
1428 (whether to generate regular loads/stores, or with special handling for
1432 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1434 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1435 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1436 enum dr_alignment_support supportable_dr_alignment;
1437 struct data_reference *dr0 = NULL, *first_store = NULL;
1438 struct data_reference *dr;
1440 bool do_peeling = false;
1441 bool do_versioning = false;
1444 stmt_vec_info stmt_info;
1445 int vect_versioning_for_alias_required;
1446 unsigned int npeel = 0;
1447 bool all_misalignments_unknown = true;
1448 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1449 unsigned possible_npeel_number = 1;
1451 unsigned int nelements, mis, same_align_drs_max = 0;
1453 if (vect_print_dump_info (REPORT_DETAILS))
1454 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1456 /* While cost model enhancements are expected in the future, the high level
1457 view of the code at this time is as follows:
1459 A) If there is a misaligned access then see if peeling to align
1460 this access can make all data references satisfy
1461 vect_supportable_dr_alignment. If so, update data structures
1462 as needed and return true.
1464 B) If peeling wasn't possible and there is a data reference with an
1465 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1466 then see if loop versioning checks can be used to make all data
1467 references satisfy vect_supportable_dr_alignment. If so, update
1468 data structures as needed and return true.
1470 C) If neither peeling nor versioning were successful then return false if
1471 any data reference does not satisfy vect_supportable_dr_alignment.
1473 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1475 Note, Possibility 3 above (which is peeling and versioning together) is not
1476 being done at this time. */
1478 /* (1) Peeling to force alignment. */
1480 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1482 + How many accesses will become aligned due to the peeling
1483 - How many accesses will become unaligned due to the peeling,
1484 and the cost of misaligned accesses.
1485 - The cost of peeling (the extra runtime checks, the increase
1488 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1490 stmt = DR_STMT (dr);
1491 stmt_info = vinfo_for_stmt (stmt);
1493 /* For interleaving, only the alignment of the first access
1495 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1496 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1499 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1500 do_peeling = vector_alignment_reachable_p (dr);
1503 if (known_alignment_for_access_p (dr))
1505 unsigned int npeel_tmp;
1506 bool negative = tree_int_cst_compare (DR_STEP (dr),
1507 size_zero_node) < 0;
1509 /* Save info about DR in the hash table. */
1510 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1511 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1512 htab_create (1, vect_peeling_hash,
1513 vect_peeling_hash_eq, free);
1515 vectype = STMT_VINFO_VECTYPE (stmt_info);
1516 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1517 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1518 TREE_TYPE (DR_REF (dr))));
1519 npeel_tmp = (negative
1520 ? (mis - nelements) : (nelements - mis))
1523 /* For multiple types, it is possible that the bigger type access
1524 will have more than one peeling option. E.g., a loop with two
1525 types: one of size (vector size / 4), and the other one of
1526 size (vector size / 8). Vectorization factor will 8. If both
1527 access are misaligned by 3, the first one needs one scalar
1528 iteration to be aligned, and the second one needs 5. But the
1529 the first one will be aligned also by peeling 5 scalar
1530 iterations, and in that case both accesses will be aligned.
1531 Hence, except for the immediate peeling amount, we also want
1532 to try to add full vector size, while we don't exceed
1533 vectorization factor.
1534 We do this automtically for cost model, since we calculate cost
1535 for every peeling option. */
1536 if (!flag_vect_cost_model)
1537 possible_npeel_number = vf /nelements;
1539 /* Handle the aligned case. We may decide to align some other
1540 access, making DR unaligned. */
1541 if (DR_MISALIGNMENT (dr) == 0)
1544 if (!flag_vect_cost_model)
1545 possible_npeel_number++;
1548 for (j = 0; j < possible_npeel_number; j++)
1550 gcc_assert (npeel_tmp <= vf);
1551 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1552 npeel_tmp += nelements;
1555 all_misalignments_unknown = false;
1556 /* Data-ref that was chosen for the case that all the
1557 misalignments are unknown is not relevant anymore, since we
1558 have a data-ref with known alignment. */
1563 /* If we don't know all the misalignment values, we prefer
1564 peeling for data-ref that has maximum number of data-refs
1565 with the same alignment, unless the target prefers to align
1566 stores over load. */
1567 if (all_misalignments_unknown)
1569 if (same_align_drs_max < VEC_length (dr_p,
1570 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1573 same_align_drs_max = VEC_length (dr_p,
1574 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1578 if (!first_store && DR_IS_WRITE (dr))
1582 /* If there are both known and unknown misaligned accesses in the
1583 loop, we choose peeling amount according to the known
1587 if (!supportable_dr_alignment)
1590 if (!first_store && DR_IS_WRITE (dr))
1597 if (!aligned_access_p (dr))
1599 if (vect_print_dump_info (REPORT_DETAILS))
1600 fprintf (vect_dump, "vector alignment may not be reachable");
1607 vect_versioning_for_alias_required
1608 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1610 /* Temporarily, if versioning for alias is required, we disable peeling
1611 until we support peeling and versioning. Often peeling for alignment
1612 will require peeling for loop-bound, which in turn requires that we
1613 know how to adjust the loop ivs after the loop. */
1614 if (vect_versioning_for_alias_required
1615 || !vect_can_advance_ivs_p (loop_vinfo)
1616 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1619 if (do_peeling && all_misalignments_unknown
1620 && vect_supportable_dr_alignment (dr0, false))
1623 /* Check if the target requires to prefer stores over loads, i.e., if
1624 misaligned stores are more expensive than misaligned loads (taking
1625 drs with same alignment into account). */
1626 if (first_store && DR_IS_READ (dr0))
1628 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1629 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1630 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1631 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1633 vect_get_data_access_cost (dr0, &load_inside_cost,
1634 &load_outside_cost);
1635 vect_get_data_access_cost (first_store, &store_inside_cost,
1636 &store_outside_cost);
1638 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1639 aligning the load DR0). */
1640 load_inside_penalty = store_inside_cost;
1641 load_outside_penalty = store_outside_cost;
1642 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1643 (vinfo_for_stmt (DR_STMT (first_store))),
1646 if (DR_IS_READ (dr))
1648 load_inside_penalty += load_inside_cost;
1649 load_outside_penalty += load_outside_cost;
1653 load_inside_penalty += store_inside_cost;
1654 load_outside_penalty += store_outside_cost;
1657 /* Calculate the penalty for leaving DR0 unaligned (by
1658 aligning the FIRST_STORE). */
1659 store_inside_penalty = load_inside_cost;
1660 store_outside_penalty = load_outside_cost;
1661 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1662 (vinfo_for_stmt (DR_STMT (dr0))),
1665 if (DR_IS_READ (dr))
1667 store_inside_penalty += load_inside_cost;
1668 store_outside_penalty += load_outside_cost;
1672 store_inside_penalty += store_inside_cost;
1673 store_outside_penalty += store_outside_cost;
1676 if (load_inside_penalty > store_inside_penalty
1677 || (load_inside_penalty == store_inside_penalty
1678 && load_outside_penalty > store_outside_penalty))
1682 /* In case there are only loads with different unknown misalignments, use
1683 peeling only if it may help to align other accesses in the loop. */
1684 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1685 (vinfo_for_stmt (DR_STMT (dr0))))
1686 && vect_supportable_dr_alignment (dr0, false)
1687 != dr_unaligned_supported)
1691 if (do_peeling && !dr0)
1693 /* Peeling is possible, but there is no data access that is not supported
1694 unless aligned. So we try to choose the best possible peeling. */
1696 /* We should get here only if there are drs with known misalignment. */
1697 gcc_assert (!all_misalignments_unknown);
1699 /* Choose the best peeling from the hash table. */
1700 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1707 stmt = DR_STMT (dr0);
1708 stmt_info = vinfo_for_stmt (stmt);
1709 vectype = STMT_VINFO_VECTYPE (stmt_info);
1710 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1712 if (known_alignment_for_access_p (dr0))
1714 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1715 size_zero_node) < 0;
1718 /* Since it's known at compile time, compute the number of
1719 iterations in the peeled loop (the peeling factor) for use in
1720 updating DR_MISALIGNMENT values. The peeling factor is the
1721 vectorization factor minus the misalignment as an element
1723 mis = DR_MISALIGNMENT (dr0);
1724 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1725 npeel = ((negative ? mis - nelements : nelements - mis)
1729 /* For interleaved data access every iteration accesses all the
1730 members of the group, therefore we divide the number of iterations
1731 by the group size. */
1732 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1733 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1734 npeel /= DR_GROUP_SIZE (stmt_info);
1736 if (vect_print_dump_info (REPORT_DETAILS))
1737 fprintf (vect_dump, "Try peeling by %d", npeel);
1740 /* Ensure that all data refs can be vectorized after the peel. */
1741 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1743 int save_misalignment;
1748 stmt = DR_STMT (dr);
1749 stmt_info = vinfo_for_stmt (stmt);
1750 /* For interleaving, only the alignment of the first access
1752 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1753 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1756 save_misalignment = DR_MISALIGNMENT (dr);
1757 vect_update_misalignment_for_peel (dr, dr0, npeel);
1758 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1759 SET_DR_MISALIGNMENT (dr, save_misalignment);
1761 if (!supportable_dr_alignment)
1768 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1770 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1779 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1780 If the misalignment of DR_i is identical to that of dr0 then set
1781 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1782 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1783 by the peeling factor times the element size of DR_i (MOD the
1784 vectorization factor times the size). Otherwise, the
1785 misalignment of DR_i must be set to unknown. */
1786 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1788 vect_update_misalignment_for_peel (dr, dr0, npeel);
1790 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1792 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1794 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1795 SET_DR_MISALIGNMENT (dr0, 0);
1796 if (vect_print_dump_info (REPORT_ALIGNMENT))
1797 fprintf (vect_dump, "Alignment of access forced using peeling.");
1799 if (vect_print_dump_info (REPORT_DETAILS))
1800 fprintf (vect_dump, "Peeling for alignment will be applied.");
1802 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1809 /* (2) Versioning to force alignment. */
1811 /* Try versioning if:
1812 1) flag_tree_vect_loop_version is TRUE
1813 2) optimize loop for speed
1814 3) there is at least one unsupported misaligned data ref with an unknown
1816 4) all misaligned data refs with a known misalignment are supported, and
1817 5) the number of runtime alignment checks is within reason. */
1820 flag_tree_vect_loop_version
1821 && optimize_loop_nest_for_speed_p (loop)
1822 && (!loop->inner); /* FORNOW */
1826 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1828 stmt = DR_STMT (dr);
1829 stmt_info = vinfo_for_stmt (stmt);
1831 /* For interleaving, only the alignment of the first access
1833 if (aligned_access_p (dr)
1834 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1835 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1838 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1840 if (!supportable_dr_alignment)
1846 if (known_alignment_for_access_p (dr)
1847 || VEC_length (gimple,
1848 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1849 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1851 do_versioning = false;
1855 stmt = DR_STMT (dr);
1856 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1857 gcc_assert (vectype);
1859 /* The rightmost bits of an aligned address must be zeros.
1860 Construct the mask needed for this test. For example,
1861 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1862 mask must be 15 = 0xf. */
1863 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1865 /* FORNOW: use the same mask to test all potentially unaligned
1866 references in the loop. The vectorizer currently supports
1867 a single vector size, see the reference to
1868 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1869 vectorization factor is computed. */
1870 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1871 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1872 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1873 VEC_safe_push (gimple, heap,
1874 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1879 /* Versioning requires at least one misaligned data reference. */
1880 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1881 do_versioning = false;
1882 else if (!do_versioning)
1883 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1888 VEC(gimple,heap) *may_misalign_stmts
1889 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1892 /* It can now be assumed that the data references in the statements
1893 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1894 of the loop being vectorized. */
1895 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1897 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1898 dr = STMT_VINFO_DATA_REF (stmt_info);
1899 SET_DR_MISALIGNMENT (dr, 0);
1900 if (vect_print_dump_info (REPORT_ALIGNMENT))
1901 fprintf (vect_dump, "Alignment of access forced using versioning.");
1904 if (vect_print_dump_info (REPORT_DETAILS))
1905 fprintf (vect_dump, "Versioning for alignment will be applied.");
1907 /* Peeling and versioning can't be done together at this time. */
1908 gcc_assert (! (do_peeling && do_versioning));
1910 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1915 /* This point is reached if neither peeling nor versioning is being done. */
1916 gcc_assert (! (do_peeling || do_versioning));
1918 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1923 /* Function vect_find_same_alignment_drs.
1925 Update group and alignment relations according to the chosen
1926 vectorization factor. */
1929 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1930 loop_vec_info loop_vinfo)
1933 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1934 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1935 struct data_reference *dra = DDR_A (ddr);
1936 struct data_reference *drb = DDR_B (ddr);
1937 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1938 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1939 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1940 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1941 lambda_vector dist_v;
1942 unsigned int loop_depth;
1944 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1950 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1953 /* Loop-based vectorization and known data dependence. */
1954 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1957 /* Data-dependence analysis reports a distance vector of zero
1958 for data-references that overlap only in the first iteration
1959 but have different sign step (see PR45764).
1960 So as a sanity check require equal DR_STEP. */
1961 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1964 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1965 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1967 int dist = dist_v[loop_depth];
1969 if (vect_print_dump_info (REPORT_DR_DETAILS))
1970 fprintf (vect_dump, "dependence distance = %d.", dist);
1972 /* Same loop iteration. */
1974 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1976 /* Two references with distance zero have the same alignment. */
1977 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1978 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1979 if (vect_print_dump_info (REPORT_ALIGNMENT))
1980 fprintf (vect_dump, "accesses have the same alignment.");
1981 if (vect_print_dump_info (REPORT_DR_DETAILS))
1983 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1984 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1985 fprintf (vect_dump, " and ");
1986 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1993 /* Function vect_analyze_data_refs_alignment
1995 Analyze the alignment of the data-references in the loop.
1996 Return FALSE if a data reference is found that cannot be vectorized. */
1999 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2000 bb_vec_info bb_vinfo)
2002 if (vect_print_dump_info (REPORT_DETAILS))
2003 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2005 /* Mark groups of data references with same alignment using
2006 data dependence information. */
2009 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2010 struct data_dependence_relation *ddr;
2013 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2014 vect_find_same_alignment_drs (ddr, loop_vinfo);
2017 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2019 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2021 "not vectorized: can't calculate alignment for data ref.");
2029 /* Analyze groups of strided accesses: check that DR belongs to a group of
2030 strided accesses of legal size, step, etc. Detect gaps, single element
2031 interleaving, and other special cases. Set strided access info.
2032 Collect groups of strided stores for further use in SLP analysis. */
2035 vect_analyze_group_access (struct data_reference *dr)
2037 tree step = DR_STEP (dr);
2038 tree scalar_type = TREE_TYPE (DR_REF (dr));
2039 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2040 gimple stmt = DR_STMT (dr);
2041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2043 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2044 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2045 HOST_WIDE_INT stride;
2046 bool slp_impossible = false;
2048 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2049 interleaving group (including gaps). */
2050 stride = dr_step / type_size;
2052 /* Not consecutive access is possible only if it is a part of interleaving. */
2053 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2055 /* Check if it this DR is a part of interleaving, and is a single
2056 element of the group that is accessed in the loop. */
2058 /* Gaps are supported only for loads. STEP must be a multiple of the type
2059 size. The size of the group must be a power of 2. */
2061 && (dr_step % type_size) == 0
2063 && exact_log2 (stride) != -1)
2065 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2066 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2067 if (vect_print_dump_info (REPORT_DR_DETAILS))
2069 fprintf (vect_dump, "Detected single element interleaving ");
2070 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2071 fprintf (vect_dump, " step ");
2072 print_generic_expr (vect_dump, step, TDF_SLIM);
2077 if (vect_print_dump_info (REPORT_DETAILS))
2079 fprintf (vect_dump, "not consecutive access ");
2080 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2085 /* Mark the statement as unvectorizable. */
2086 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2093 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2095 /* First stmt in the interleaving chain. Check the chain. */
2096 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2097 struct data_reference *data_ref = dr;
2098 unsigned int count = 1;
2100 tree prev_init = DR_INIT (data_ref);
2102 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2106 /* Skip same data-refs. In case that two or more stmts share
2107 data-ref (supported only for loads), we vectorize only the first
2108 stmt, and the rest get their vectorized loads from the first
2110 if (!tree_int_cst_compare (DR_INIT (data_ref),
2111 DR_INIT (STMT_VINFO_DATA_REF (
2112 vinfo_for_stmt (next)))))
2114 if (DR_IS_WRITE (data_ref))
2116 if (vect_print_dump_info (REPORT_DETAILS))
2117 fprintf (vect_dump, "Two store stmts share the same dr.");
2121 /* Check that there is no load-store dependencies for this loads
2122 to prevent a case of load-store-load to the same location. */
2123 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2124 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2126 if (vect_print_dump_info (REPORT_DETAILS))
2128 "READ_WRITE dependence in interleaving.");
2132 /* For load use the same data-ref load. */
2133 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2136 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2141 /* Check that all the accesses have the same STEP. */
2142 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2143 if (tree_int_cst_compare (step, next_step))
2145 if (vect_print_dump_info (REPORT_DETAILS))
2146 fprintf (vect_dump, "not consecutive access in interleaving");
2150 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2151 /* Check that the distance between two accesses is equal to the type
2152 size. Otherwise, we have gaps. */
2153 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2154 - TREE_INT_CST_LOW (prev_init)) / type_size;
2157 /* FORNOW: SLP of accesses with gaps is not supported. */
2158 slp_impossible = true;
2159 if (DR_IS_WRITE (data_ref))
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "interleaved store with gaps");
2169 /* Store the gap from the previous member of the group. If there is no
2170 gap in the access, DR_GROUP_GAP is always 1. */
2171 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2173 prev_init = DR_INIT (data_ref);
2174 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2175 /* Count the number of data-refs in the chain. */
2179 /* COUNT is the number of accesses found, we multiply it by the size of
2180 the type to get COUNT_IN_BYTES. */
2181 count_in_bytes = type_size * count;
2183 /* Check that the size of the interleaving (including gaps) is not
2184 greater than STEP. */
2185 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2187 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "interleaving size is greater than step for ");
2190 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2195 /* Check that the size of the interleaving is equal to STEP for stores,
2196 i.e., that there are no gaps. */
2197 if (dr_step && dr_step != count_in_bytes)
2199 if (DR_IS_READ (dr))
2201 slp_impossible = true;
2202 /* There is a gap after the last load in the group. This gap is a
2203 difference between the stride and the number of elements. When
2204 there is no gap, this difference should be 0. */
2205 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2209 if (vect_print_dump_info (REPORT_DETAILS))
2210 fprintf (vect_dump, "interleaved store with gaps");
2215 /* Check that STEP is a multiple of type size. */
2216 if (dr_step && (dr_step % type_size) != 0)
2218 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "step is not a multiple of type size: step ");
2221 print_generic_expr (vect_dump, step, TDF_SLIM);
2222 fprintf (vect_dump, " size ");
2223 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2229 /* FORNOW: we handle only interleaving that is a power of 2.
2230 We don't fail here if it may be still possible to vectorize the
2231 group using SLP. If not, the size of the group will be checked in
2232 vect_analyze_operations, and the vectorization will fail. */
2233 if (exact_log2 (stride) == -1)
2235 if (vect_print_dump_info (REPORT_DETAILS))
2236 fprintf (vect_dump, "interleaving is not a power of 2");
2245 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2246 if (vect_print_dump_info (REPORT_DETAILS))
2247 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2249 /* SLP: create an SLP data structure for every interleaving group of
2250 stores for further analysis in vect_analyse_slp. */
2251 if (DR_IS_WRITE (dr) && !slp_impossible)
2254 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2257 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2266 /* Analyze the access pattern of the data-reference DR.
2267 In case of non-consecutive accesses call vect_analyze_group_access() to
2268 analyze groups of strided accesses. */
2271 vect_analyze_data_ref_access (struct data_reference *dr)
2273 tree step = DR_STEP (dr);
2274 tree scalar_type = TREE_TYPE (DR_REF (dr));
2275 gimple stmt = DR_STMT (dr);
2276 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2278 struct loop *loop = NULL;
2279 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2282 loop = LOOP_VINFO_LOOP (loop_vinfo);
2284 if (loop_vinfo && !step)
2286 if (vect_print_dump_info (REPORT_DETAILS))
2287 fprintf (vect_dump, "bad data-ref access in loop");
2291 /* Don't allow invariant accesses in loops. */
2292 if (loop_vinfo && dr_step == 0)
2295 if (loop && nested_in_vect_loop_p (loop, stmt))
2297 /* Interleaved accesses are not yet supported within outer-loop
2298 vectorization for references in the inner-loop. */
2299 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2301 /* For the rest of the analysis we use the outer-loop step. */
2302 step = STMT_VINFO_DR_STEP (stmt_info);
2303 dr_step = TREE_INT_CST_LOW (step);
2307 if (vect_print_dump_info (REPORT_ALIGNMENT))
2308 fprintf (vect_dump, "zero step in outer loop.");
2309 if (DR_IS_READ (dr))
2317 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2319 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2321 /* Mark that it is not interleaving. */
2322 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2326 if (loop && nested_in_vect_loop_p (loop, stmt))
2328 if (vect_print_dump_info (REPORT_ALIGNMENT))
2329 fprintf (vect_dump, "strided access in outer loop.");
2333 /* Not consecutive access - check if it's a part of interleaving group. */
2334 return vect_analyze_group_access (dr);
2338 /* Function vect_analyze_data_ref_accesses.
2340 Analyze the access pattern of all the data references in the loop.
2342 FORNOW: the only access pattern that is considered vectorizable is a
2343 simple step 1 (consecutive) access.
2345 FORNOW: handle only arrays and pointer accesses. */
2348 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2351 VEC (data_reference_p, heap) *datarefs;
2352 struct data_reference *dr;
2354 if (vect_print_dump_info (REPORT_DETAILS))
2355 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2358 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2360 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2362 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2363 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2364 && !vect_analyze_data_ref_access (dr))
2366 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2367 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2371 /* Mark the statement as not vectorizable. */
2372 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2382 /* Function vect_prune_runtime_alias_test_list.
2384 Prune a list of ddrs to be tested at run-time by versioning for alias.
2385 Return FALSE if resulting list of ddrs is longer then allowed by
2386 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2389 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2391 VEC (ddr_p, heap) * ddrs =
2392 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2395 if (vect_print_dump_info (REPORT_DETAILS))
2396 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2398 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2403 ddr_i = VEC_index (ddr_p, ddrs, i);
2406 for (j = 0; j < i; j++)
2408 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2410 if (vect_vfa_range_equal (ddr_i, ddr_j))
2412 if (vect_print_dump_info (REPORT_DR_DETAILS))
2414 fprintf (vect_dump, "found equal ranges ");
2415 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2416 fprintf (vect_dump, ", ");
2417 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2418 fprintf (vect_dump, " and ");
2419 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2420 fprintf (vect_dump, ", ");
2421 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2430 VEC_ordered_remove (ddr_p, ddrs, i);
2436 if (VEC_length (ddr_p, ddrs) >
2437 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2439 if (vect_print_dump_info (REPORT_DR_DETAILS))
2442 "disable versioning for alias - max number of generated "
2443 "checks exceeded.");
2446 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2455 /* Function vect_analyze_data_refs.
2457 Find all the data references in the loop or basic block.
2459 The general structure of the analysis of data refs in the vectorizer is as
2461 1- vect_analyze_data_refs(loop/bb): call
2462 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2463 in the loop/bb and their dependences.
2464 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2465 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2466 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2471 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2472 bb_vec_info bb_vinfo,
2475 struct loop *loop = NULL;
2476 basic_block bb = NULL;
2478 VEC (data_reference_p, heap) *datarefs;
2479 struct data_reference *dr;
2483 if (vect_print_dump_info (REPORT_DETAILS))
2484 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2488 loop = LOOP_VINFO_LOOP (loop_vinfo);
2489 res = compute_data_dependences_for_loop
2490 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2491 &LOOP_VINFO_DDRS (loop_vinfo));
2495 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2496 fprintf (vect_dump, "not vectorized: loop contains function calls"
2497 " or data references that cannot be analyzed");
2501 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2505 bb = BB_VINFO_BB (bb_vinfo);
2506 res = compute_data_dependences_for_bb (bb, true,
2507 &BB_VINFO_DATAREFS (bb_vinfo),
2508 &BB_VINFO_DDRS (bb_vinfo));
2511 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2512 fprintf (vect_dump, "not vectorized: basic block contains function"
2513 " calls or data references that cannot be analyzed");
2517 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2520 /* Go through the data-refs, check that the analysis succeeded. Update
2521 pointer from stmt_vec_info struct to DR and vectype. */
2523 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2526 stmt_vec_info stmt_info;
2527 tree base, offset, init;
2530 if (!dr || !DR_REF (dr))
2532 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2533 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2537 stmt = DR_STMT (dr);
2538 stmt_info = vinfo_for_stmt (stmt);
2540 /* Check that analysis of the data-ref succeeded. */
2541 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2544 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2546 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2547 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2552 /* Mark the statement as not vectorizable. */
2553 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2560 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2562 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2563 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2567 /* Mark the statement as not vectorizable. */
2568 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2575 base = unshare_expr (DR_BASE_ADDRESS (dr));
2576 offset = unshare_expr (DR_OFFSET (dr));
2577 init = unshare_expr (DR_INIT (dr));
2579 if (stmt_could_throw_p (stmt))
2581 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2583 fprintf (vect_dump, "not vectorized: statement can throw an "
2585 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2590 /* Update DR field in stmt_vec_info struct. */
2592 /* If the dataref is in an inner-loop of the loop that is considered for
2593 for vectorization, we also want to analyze the access relative to
2594 the outer-loop (DR contains information only relative to the
2595 inner-most enclosing loop). We do that by building a reference to the
2596 first location accessed by the inner-loop, and analyze it relative to
2598 if (loop && nested_in_vect_loop_p (loop, stmt))
2600 tree outer_step, outer_base, outer_init;
2601 HOST_WIDE_INT pbitsize, pbitpos;
2603 enum machine_mode pmode;
2604 int punsignedp, pvolatilep;
2605 affine_iv base_iv, offset_iv;
2608 /* Build a reference to the first location accessed by the
2609 inner-loop: *(BASE+INIT). (The first location is actually
2610 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2611 tree inner_base = build_fold_indirect_ref
2612 (fold_build2 (POINTER_PLUS_EXPR,
2613 TREE_TYPE (base), base,
2614 fold_convert (sizetype, init)));
2616 if (vect_print_dump_info (REPORT_DETAILS))
2618 fprintf (vect_dump, "analyze in outer-loop: ");
2619 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2622 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2623 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2624 gcc_assert (outer_base != NULL_TREE);
2626 if (pbitpos % BITS_PER_UNIT != 0)
2628 if (vect_print_dump_info (REPORT_DETAILS))
2629 fprintf (vect_dump, "failed: bit offset alignment.\n");
2633 outer_base = build_fold_addr_expr (outer_base);
2634 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2637 if (vect_print_dump_info (REPORT_DETAILS))
2638 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2645 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2653 offset_iv.base = ssize_int (0);
2654 offset_iv.step = ssize_int (0);
2656 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2659 if (vect_print_dump_info (REPORT_DETAILS))
2660 fprintf (vect_dump, "evolution of offset is not affine.\n");
2664 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2665 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2666 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2667 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2668 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2670 outer_step = size_binop (PLUS_EXPR,
2671 fold_convert (ssizetype, base_iv.step),
2672 fold_convert (ssizetype, offset_iv.step));
2674 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2675 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2676 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2677 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2678 STMT_VINFO_DR_OFFSET (stmt_info) =
2679 fold_convert (ssizetype, offset_iv.base);
2680 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2681 size_int (highest_pow2_factor (offset_iv.base));
2683 if (vect_print_dump_info (REPORT_DETAILS))
2685 fprintf (vect_dump, "\touter base_address: ");
2686 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2687 fprintf (vect_dump, "\n\touter offset from base address: ");
2688 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2689 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2690 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2691 fprintf (vect_dump, "\n\touter step: ");
2692 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2693 fprintf (vect_dump, "\n\touter aligned to: ");
2694 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2698 if (STMT_VINFO_DATA_REF (stmt_info))
2700 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2703 "not vectorized: more than one data ref in stmt: ");
2704 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2709 STMT_VINFO_DATA_REF (stmt_info) = dr;
2711 /* Set vectype for STMT. */
2712 scalar_type = TREE_TYPE (DR_REF (dr));
2713 STMT_VINFO_VECTYPE (stmt_info) =
2714 get_vectype_for_scalar_type (scalar_type);
2715 if (!STMT_VINFO_VECTYPE (stmt_info))
2717 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2720 "not vectorized: no vectype for stmt: ");
2721 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2722 fprintf (vect_dump, " scalar_type: ");
2723 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2728 /* Mark the statement as not vectorizable. */
2729 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2736 /* Adjust the minimal vectorization factor according to the
2738 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2747 /* Function vect_get_new_vect_var.
2749 Returns a name for a new variable. The current naming scheme appends the
2750 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2751 the name of vectorizer generated variables, and appends that to NAME if
2755 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2762 case vect_simple_var:
2765 case vect_scalar_var:
2768 case vect_pointer_var:
2777 char* tmp = concat (prefix, name, NULL);
2778 new_vect_var = create_tmp_var (type, tmp);
2782 new_vect_var = create_tmp_var (type, prefix);
2784 /* Mark vector typed variable as a gimple register variable. */
2785 if (TREE_CODE (type) == VECTOR_TYPE)
2786 DECL_GIMPLE_REG_P (new_vect_var) = true;
2788 return new_vect_var;
2792 /* Function vect_create_addr_base_for_vector_ref.
2794 Create an expression that computes the address of the first memory location
2795 that will be accessed for a data reference.
2798 STMT: The statement containing the data reference.
2799 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2800 OFFSET: Optional. If supplied, it is be added to the initial address.
2801 LOOP: Specify relative to which loop-nest should the address be computed.
2802 For example, when the dataref is in an inner-loop nested in an
2803 outer-loop that is now being vectorized, LOOP can be either the
2804 outer-loop, or the inner-loop. The first memory location accessed
2805 by the following dataref ('in' points to short):
2812 if LOOP=i_loop: &in (relative to i_loop)
2813 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2816 1. Return an SSA_NAME whose value is the address of the memory location of
2817 the first vector of the data reference.
2818 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2819 these statement(s) which define the returned SSA_NAME.
2821 FORNOW: We are only handling array accesses with step 1. */
2824 vect_create_addr_base_for_vector_ref (gimple stmt,
2825 gimple_seq *new_stmt_list,
2829 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2830 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2831 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2833 tree data_ref_base_var;
2835 tree addr_base, addr_expr;
2837 gimple_seq seq = NULL;
2838 tree base_offset = unshare_expr (DR_OFFSET (dr));
2839 tree init = unshare_expr (DR_INIT (dr));
2841 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2845 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2847 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2849 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2851 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2852 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2853 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2857 base_name = build_fold_indirect_ref (data_ref_base);
2860 base_offset = ssize_int (0);
2861 init = ssize_int (0);
2862 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2865 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2866 add_referenced_var (data_ref_base_var);
2867 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2869 gimple_seq_add_seq (new_stmt_list, seq);
2871 /* Create base_offset */
2872 base_offset = size_binop (PLUS_EXPR,
2873 fold_convert (sizetype, base_offset),
2874 fold_convert (sizetype, init));
2875 dest = create_tmp_var (sizetype, "base_off");
2876 add_referenced_var (dest);
2877 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2878 gimple_seq_add_seq (new_stmt_list, seq);
2882 tree tmp = create_tmp_var (sizetype, "offset");
2884 add_referenced_var (tmp);
2885 offset = fold_build2 (MULT_EXPR, sizetype,
2886 fold_convert (sizetype, offset), step);
2887 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2888 base_offset, offset);
2889 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2890 gimple_seq_add_seq (new_stmt_list, seq);
2893 /* base + base_offset */
2895 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2896 data_ref_base, base_offset);
2899 addr_base = build1 (ADDR_EXPR,
2900 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2901 unshare_expr (DR_REF (dr)));
2904 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2905 base = get_base_address (DR_REF (dr));
2907 && TREE_CODE (base) == MEM_REF)
2909 = build_qualified_type (vect_ptr_type,
2910 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2912 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2913 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2914 get_name (base_name));
2915 add_referenced_var (addr_expr);
2916 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2917 gimple_seq_add_seq (new_stmt_list, seq);
2919 if (DR_PTR_INFO (dr)
2920 && TREE_CODE (vec_stmt) == SSA_NAME)
2921 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2923 if (vect_print_dump_info (REPORT_DETAILS))
2925 fprintf (vect_dump, "created ");
2926 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2933 /* Function vect_create_data_ref_ptr.
2935 Create a new pointer to vector type (vp), that points to the first location
2936 accessed in the loop by STMT, along with the def-use update chain to
2937 appropriately advance the pointer through the loop iterations. Also set
2938 aliasing information for the pointer. This vector pointer is used by the
2939 callers to this function to create a memory reference expression for vector
2943 1. STMT: a stmt that references memory. Expected to be of the form
2944 GIMPLE_ASSIGN <name, data-ref> or
2945 GIMPLE_ASSIGN <data-ref, name>.
2946 2. AT_LOOP: the loop where the vector memref is to be created.
2947 3. OFFSET (optional): an offset to be added to the initial address accessed
2948 by the data-ref in STMT.
2949 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2950 pointing to the initial address.
2951 5. TYPE: if not NULL indicates the required type of the data-ref.
2954 1. Declare a new ptr to vector_type, and have it point to the base of the
2955 data reference (initial addressed accessed by the data reference).
2956 For example, for vector of type V8HI, the following code is generated:
2959 vp = (v8hi *)initial_address;
2961 if OFFSET is not supplied:
2962 initial_address = &a[init];
2963 if OFFSET is supplied:
2964 initial_address = &a[init + OFFSET];
2966 Return the initial_address in INITIAL_ADDRESS.
2968 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2969 update the pointer in each iteration of the loop.
2971 Return the increment stmt that updates the pointer in PTR_INCR.
2973 3. Set INV_P to true if the access pattern of the data reference in the
2974 vectorized loop is invariant. Set it to false otherwise.
2976 4. Return the pointer. */
2979 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2980 tree offset, tree *initial_address, gimple *ptr_incr,
2981 bool only_init, bool *inv_p)
2984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2985 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2986 struct loop *loop = NULL;
2987 bool nested_in_vect_loop = false;
2988 struct loop *containing_loop = NULL;
2989 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2994 gimple_seq new_stmt_list = NULL;
2998 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3000 gimple_stmt_iterator incr_gsi;
3003 tree indx_before_incr, indx_after_incr;
3006 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3007 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
3012 loop = LOOP_VINFO_LOOP (loop_vinfo);
3013 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3014 containing_loop = (gimple_bb (stmt))->loop_father;
3015 pe = loop_preheader_edge (loop);
3019 gcc_assert (bb_vinfo);
3024 /* Check the step (evolution) of the load in LOOP, and record
3025 whether it's invariant. */
3026 if (nested_in_vect_loop)
3027 step = STMT_VINFO_DR_STEP (stmt_info);
3029 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3031 if (tree_int_cst_compare (step, size_zero_node) == 0)
3035 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3037 /* Create an expression for the first address accessed by this load
3039 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3041 if (vect_print_dump_info (REPORT_DETAILS))
3043 tree data_ref_base = base_name;
3044 fprintf (vect_dump, "create vector-pointer variable to type: ");
3045 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3046 if (TREE_CODE (data_ref_base) == VAR_DECL
3047 || TREE_CODE (data_ref_base) == ARRAY_REF)
3048 fprintf (vect_dump, " vectorizing an array ref: ");
3049 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3050 fprintf (vect_dump, " vectorizing a record based array ref: ");
3051 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3052 fprintf (vect_dump, " vectorizing a pointer ref: ");
3053 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3056 /* (1) Create the new vector-pointer variable. */
3057 vect_ptr_type = build_pointer_type (vectype);
3058 base = get_base_address (DR_REF (dr));
3060 && TREE_CODE (base) == MEM_REF)
3062 = build_qualified_type (vect_ptr_type,
3063 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3064 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3065 get_name (base_name));
3067 /* Vector types inherit the alias set of their component type by default so
3068 we need to use a ref-all pointer if the data reference does not conflict
3069 with the created vector data reference because it is not addressable. */
3070 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3071 get_alias_set (DR_REF (dr))))
3074 = build_pointer_type_for_mode (vectype,
3075 TYPE_MODE (vect_ptr_type), true);
3076 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3077 get_name (base_name));
3080 /* Likewise for any of the data references in the stmt group. */
3081 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3083 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3086 tree lhs = gimple_assign_lhs (orig_stmt);
3087 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3088 get_alias_set (lhs)))
3091 = build_pointer_type_for_mode (vectype,
3092 TYPE_MODE (vect_ptr_type), true);
3094 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3095 get_name (base_name));
3099 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3104 add_referenced_var (vect_ptr);
3106 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3107 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3108 def-use update cycles for the pointer: one relative to the outer-loop
3109 (LOOP), which is what steps (3) and (4) below do. The other is relative
3110 to the inner-loop (which is the inner-most loop containing the dataref),
3111 and this is done be step (5) below.
3113 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3114 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3115 redundant. Steps (3),(4) create the following:
3118 LOOP: vp1 = phi(vp0,vp2)
3124 If there is an inner-loop nested in loop, then step (5) will also be
3125 applied, and an additional update in the inner-loop will be created:
3128 LOOP: vp1 = phi(vp0,vp2)
3130 inner: vp3 = phi(vp1,vp4)
3131 vp4 = vp3 + inner_step
3137 /* (2) Calculate the initial address the vector-pointer, and set
3138 the vector-pointer to point to it before the loop. */
3140 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3142 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3148 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3149 gcc_assert (!new_bb);
3152 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3155 *initial_address = new_temp;
3157 /* Create: p = (vectype *) initial_base */
3158 if (TREE_CODE (new_temp) != SSA_NAME
3159 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3161 vec_stmt = gimple_build_assign (vect_ptr,
3162 fold_convert (vect_ptr_type, new_temp));
3163 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3164 /* Copy the points-to information if it exists. */
3165 if (DR_PTR_INFO (dr))
3166 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3167 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3170 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3171 gcc_assert (!new_bb);
3174 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3177 vect_ptr_init = new_temp;
3179 /* (3) Handle the updating of the vector-pointer inside the loop.
3180 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3181 inner-loop nested in LOOP (during outer-loop vectorization). */
3183 /* No update in loop is required. */
3184 if (only_init && (!loop_vinfo || at_loop == loop))
3185 vptr = vect_ptr_init;
3188 /* The step of the vector pointer is the Vector Size. */
3189 tree step = TYPE_SIZE_UNIT (vectype);
3190 /* One exception to the above is when the scalar step of the load in
3191 LOOP is zero. In this case the step here is also zero. */
3193 step = size_zero_node;
3195 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3197 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3199 create_iv (vect_ptr_init,
3200 fold_convert (vect_ptr_type, step),
3201 vect_ptr, loop, &incr_gsi, insert_after,
3202 &indx_before_incr, &indx_after_incr);
3203 incr = gsi_stmt (incr_gsi);
3204 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3206 /* Copy the points-to information if it exists. */
3207 if (DR_PTR_INFO (dr))
3209 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3210 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3215 vptr = indx_before_incr;
3218 if (!nested_in_vect_loop || only_init)
3222 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3223 nested in LOOP, if exists. */
3225 gcc_assert (nested_in_vect_loop);
3228 standard_iv_increment_position (containing_loop, &incr_gsi,
3230 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3231 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3233 incr = gsi_stmt (incr_gsi);
3234 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3236 /* Copy the points-to information if it exists. */
3237 if (DR_PTR_INFO (dr))
3239 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3240 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3245 return indx_before_incr;
3252 /* Function bump_vector_ptr
3254 Increment a pointer (to a vector type) by vector-size. If requested,
3255 i.e. if PTR-INCR is given, then also connect the new increment stmt
3256 to the existing def-use update-chain of the pointer, by modifying
3257 the PTR_INCR as illustrated below:
3259 The pointer def-use update-chain before this function:
3260 DATAREF_PTR = phi (p_0, p_2)
3262 PTR_INCR: p_2 = DATAREF_PTR + step
3264 The pointer def-use update-chain after this function:
3265 DATAREF_PTR = phi (p_0, p_2)
3267 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3269 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3272 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3274 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3275 the loop. The increment amount across iterations is expected
3277 BSI - location where the new update stmt is to be placed.
3278 STMT - the original scalar memory-access stmt that is being vectorized.
3279 BUMP - optional. The offset by which to bump the pointer. If not given,
3280 the offset is assumed to be vector_size.
3282 Output: Return NEW_DATAREF_PTR as illustrated above.
3287 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3288 gimple stmt, tree bump)
3290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3291 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3292 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3293 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3294 tree update = TYPE_SIZE_UNIT (vectype);
3297 use_operand_p use_p;
3298 tree new_dataref_ptr;
3303 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3304 dataref_ptr, update);
3305 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3306 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3307 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3309 /* Copy the points-to information if it exists. */
3310 if (DR_PTR_INFO (dr))
3311 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3314 return new_dataref_ptr;
3316 /* Update the vector-pointer's cross-iteration increment. */
3317 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3319 tree use = USE_FROM_PTR (use_p);
3321 if (use == dataref_ptr)
3322 SET_USE (use_p, new_dataref_ptr);
3324 gcc_assert (tree_int_cst_compare (use, update) == 0);
3327 return new_dataref_ptr;
3331 /* Function vect_create_destination_var.
3333 Create a new temporary of type VECTYPE. */
3336 vect_create_destination_var (tree scalar_dest, tree vectype)
3339 const char *new_name;
3341 enum vect_var_kind kind;
3343 kind = vectype ? vect_simple_var : vect_scalar_var;
3344 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3346 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3348 new_name = get_name (scalar_dest);
3351 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3352 add_referenced_var (vec_dest);
3357 /* Function vect_strided_store_supported.
3359 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3360 and FALSE otherwise. */
3363 vect_strided_store_supported (tree vectype)
3365 optab interleave_high_optab, interleave_low_optab;
3366 enum machine_mode mode;
3368 mode = TYPE_MODE (vectype);
3370 /* Check that the operation is supported. */
3371 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3372 vectype, optab_default);
3373 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3374 vectype, optab_default);
3375 if (!interleave_high_optab || !interleave_low_optab)
3377 if (vect_print_dump_info (REPORT_DETAILS))
3378 fprintf (vect_dump, "no optab for interleave.");
3382 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3383 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3385 if (vect_print_dump_info (REPORT_DETAILS))
3386 fprintf (vect_dump, "interleave op not supported by target.");
3394 /* Function vect_permute_store_chain.
3396 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3397 a power of 2, generate interleave_high/low stmts to reorder the data
3398 correctly for the stores. Return the final references for stores in
3401 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3402 The input is 4 vectors each containing 8 elements. We assign a number to
3403 each element, the input sequence is:
3405 1st vec: 0 1 2 3 4 5 6 7
3406 2nd vec: 8 9 10 11 12 13 14 15
3407 3rd vec: 16 17 18 19 20 21 22 23
3408 4th vec: 24 25 26 27 28 29 30 31
3410 The output sequence should be:
3412 1st vec: 0 8 16 24 1 9 17 25
3413 2nd vec: 2 10 18 26 3 11 19 27
3414 3rd vec: 4 12 20 28 5 13 21 30
3415 4th vec: 6 14 22 30 7 15 23 31
3417 i.e., we interleave the contents of the four vectors in their order.
3419 We use interleave_high/low instructions to create such output. The input of
3420 each interleave_high/low operation is two vectors:
3423 the even elements of the result vector are obtained left-to-right from the
3424 high/low elements of the first vector. The odd elements of the result are
3425 obtained left-to-right from the high/low elements of the second vector.
3426 The output of interleave_high will be: 0 4 1 5
3427 and of interleave_low: 2 6 3 7
3430 The permutation is done in log LENGTH stages. In each stage interleave_high
3431 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3432 where the first argument is taken from the first half of DR_CHAIN and the
3433 second argument from it's second half.
3436 I1: interleave_high (1st vec, 3rd vec)
3437 I2: interleave_low (1st vec, 3rd vec)
3438 I3: interleave_high (2nd vec, 4th vec)
3439 I4: interleave_low (2nd vec, 4th vec)
3441 The output for the first stage is:
3443 I1: 0 16 1 17 2 18 3 19
3444 I2: 4 20 5 21 6 22 7 23
3445 I3: 8 24 9 25 10 26 11 27
3446 I4: 12 28 13 29 14 30 15 31
3448 The output of the second stage, i.e. the final result is:
3450 I1: 0 8 16 24 1 9 17 25
3451 I2: 2 10 18 26 3 11 19 27
3452 I3: 4 12 20 28 5 13 21 30
3453 I4: 6 14 22 30 7 15 23 31. */
3456 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3457 unsigned int length,
3459 gimple_stmt_iterator *gsi,
3460 VEC(tree,heap) **result_chain)
3462 tree perm_dest, vect1, vect2, high, low;
3464 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3467 enum tree_code high_code, low_code;
3469 /* Check that the operation is supported. */
3470 if (!vect_strided_store_supported (vectype))
3473 *result_chain = VEC_copy (tree, heap, dr_chain);
3475 for (i = 0; i < exact_log2 (length); i++)
3477 for (j = 0; j < length/2; j++)
3479 vect1 = VEC_index (tree, dr_chain, j);
3480 vect2 = VEC_index (tree, dr_chain, j+length/2);
3482 /* Create interleaving stmt:
3483 in the case of big endian:
3484 high = interleave_high (vect1, vect2)
3485 and in the case of little endian:
3486 high = interleave_low (vect1, vect2). */
3487 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3488 DECL_GIMPLE_REG_P (perm_dest) = 1;
3489 add_referenced_var (perm_dest);
3490 if (BYTES_BIG_ENDIAN)
3492 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3493 low_code = VEC_INTERLEAVE_LOW_EXPR;
3497 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3498 high_code = VEC_INTERLEAVE_LOW_EXPR;
3500 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3502 high = make_ssa_name (perm_dest, perm_stmt);
3503 gimple_assign_set_lhs (perm_stmt, high);
3504 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3505 VEC_replace (tree, *result_chain, 2*j, high);
3507 /* Create interleaving stmt:
3508 in the case of big endian:
3509 low = interleave_low (vect1, vect2)
3510 and in the case of little endian:
3511 low = interleave_high (vect1, vect2). */
3512 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3513 DECL_GIMPLE_REG_P (perm_dest) = 1;
3514 add_referenced_var (perm_dest);
3515 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3517 low = make_ssa_name (perm_dest, perm_stmt);
3518 gimple_assign_set_lhs (perm_stmt, low);
3519 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3520 VEC_replace (tree, *result_chain, 2*j+1, low);
3522 dr_chain = VEC_copy (tree, heap, *result_chain);
3527 /* Function vect_setup_realignment
3529 This function is called when vectorizing an unaligned load using
3530 the dr_explicit_realign[_optimized] scheme.
3531 This function generates the following code at the loop prolog:
3534 x msq_init = *(floor(p)); # prolog load
3535 realignment_token = call target_builtin;
3537 x msq = phi (msq_init, ---)
3539 The stmts marked with x are generated only for the case of
3540 dr_explicit_realign_optimized.
3542 The code above sets up a new (vector) pointer, pointing to the first
3543 location accessed by STMT, and a "floor-aligned" load using that pointer.
3544 It also generates code to compute the "realignment-token" (if the relevant
3545 target hook was defined), and creates a phi-node at the loop-header bb
3546 whose arguments are the result of the prolog-load (created by this
3547 function) and the result of a load that takes place in the loop (to be
3548 created by the caller to this function).
3550 For the case of dr_explicit_realign_optimized:
3551 The caller to this function uses the phi-result (msq) to create the
3552 realignment code inside the loop, and sets up the missing phi argument,
3555 msq = phi (msq_init, lsq)
3556 lsq = *(floor(p')); # load in loop
3557 result = realign_load (msq, lsq, realignment_token);
3559 For the case of dr_explicit_realign:
3561 msq = *(floor(p)); # load in loop
3563 lsq = *(floor(p')); # load in loop
3564 result = realign_load (msq, lsq, realignment_token);
3567 STMT - (scalar) load stmt to be vectorized. This load accesses
3568 a memory location that may be unaligned.
3569 BSI - place where new code is to be inserted.
3570 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3574 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3575 target hook, if defined.
3576 Return value - the result of the loop-header phi node. */
3579 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3580 tree *realignment_token,
3581 enum dr_alignment_support alignment_support_scheme,
3583 struct loop **at_loop)
3585 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3586 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3587 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3588 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3589 struct loop *loop = NULL;
3591 tree scalar_dest = gimple_assign_lhs (stmt);
3598 tree msq_init = NULL_TREE;
3601 tree msq = NULL_TREE;
3602 gimple_seq stmts = NULL;
3604 bool compute_in_loop = false;
3605 bool nested_in_vect_loop = false;
3606 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3607 struct loop *loop_for_initial_load = NULL;
3611 loop = LOOP_VINFO_LOOP (loop_vinfo);
3612 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3615 gcc_assert (alignment_support_scheme == dr_explicit_realign
3616 || alignment_support_scheme == dr_explicit_realign_optimized);
3618 /* We need to generate three things:
3619 1. the misalignment computation
3620 2. the extra vector load (for the optimized realignment scheme).
3621 3. the phi node for the two vectors from which the realignment is
3622 done (for the optimized realignment scheme). */
3624 /* 1. Determine where to generate the misalignment computation.
3626 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3627 calculation will be generated by this function, outside the loop (in the
3628 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3629 caller, inside the loop.
3631 Background: If the misalignment remains fixed throughout the iterations of
3632 the loop, then both realignment schemes are applicable, and also the
3633 misalignment computation can be done outside LOOP. This is because we are
3634 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3635 are a multiple of VS (the Vector Size), and therefore the misalignment in
3636 different vectorized LOOP iterations is always the same.
3637 The problem arises only if the memory access is in an inner-loop nested
3638 inside LOOP, which is now being vectorized using outer-loop vectorization.
3639 This is the only case when the misalignment of the memory access may not
3640 remain fixed throughout the iterations of the inner-loop (as explained in
3641 detail in vect_supportable_dr_alignment). In this case, not only is the
3642 optimized realignment scheme not applicable, but also the misalignment
3643 computation (and generation of the realignment token that is passed to
3644 REALIGN_LOAD) have to be done inside the loop.
3646 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3647 or not, which in turn determines if the misalignment is computed inside
3648 the inner-loop, or outside LOOP. */
3650 if (init_addr != NULL_TREE || !loop_vinfo)
3652 compute_in_loop = true;
3653 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3657 /* 2. Determine where to generate the extra vector load.
3659 For the optimized realignment scheme, instead of generating two vector
3660 loads in each iteration, we generate a single extra vector load in the
3661 preheader of the loop, and in each iteration reuse the result of the
3662 vector load from the previous iteration. In case the memory access is in
3663 an inner-loop nested inside LOOP, which is now being vectorized using
3664 outer-loop vectorization, we need to determine whether this initial vector
3665 load should be generated at the preheader of the inner-loop, or can be
3666 generated at the preheader of LOOP. If the memory access has no evolution
3667 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3668 to be generated inside LOOP (in the preheader of the inner-loop). */
3670 if (nested_in_vect_loop)
3672 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3673 bool invariant_in_outerloop =
3674 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3675 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3678 loop_for_initial_load = loop;
3680 *at_loop = loop_for_initial_load;
3682 if (loop_for_initial_load)
3683 pe = loop_preheader_edge (loop_for_initial_load);
3685 /* 3. For the case of the optimized realignment, create the first vector
3686 load at the loop preheader. */
3688 if (alignment_support_scheme == dr_explicit_realign_optimized)
3690 /* Create msq_init = *(floor(p1)) in the loop preheader */
3692 gcc_assert (!compute_in_loop);
3693 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3694 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3695 &init_addr, &inc, true, &inv_p);
3696 new_stmt = gimple_build_assign_with_ops
3697 (BIT_AND_EXPR, NULL_TREE, ptr,
3698 build_int_cst (TREE_TYPE (ptr),
3699 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3700 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3701 gimple_assign_set_lhs (new_stmt, new_temp);
3702 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3703 gcc_assert (!new_bb);
3705 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3706 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3707 new_stmt = gimple_build_assign (vec_dest, data_ref);
3708 new_temp = make_ssa_name (vec_dest, new_stmt);
3709 gimple_assign_set_lhs (new_stmt, new_temp);
3710 mark_symbols_for_renaming (new_stmt);
3713 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3714 gcc_assert (!new_bb);
3717 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3719 msq_init = gimple_assign_lhs (new_stmt);
3722 /* 4. Create realignment token using a target builtin, if available.
3723 It is done either inside the containing loop, or before LOOP (as
3724 determined above). */
3726 if (targetm.vectorize.builtin_mask_for_load)
3730 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3733 /* Generate the INIT_ADDR computation outside LOOP. */
3734 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3738 pe = loop_preheader_edge (loop);
3739 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3740 gcc_assert (!new_bb);
3743 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3746 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3747 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3749 vect_create_destination_var (scalar_dest,
3750 gimple_call_return_type (new_stmt));
3751 new_temp = make_ssa_name (vec_dest, new_stmt);
3752 gimple_call_set_lhs (new_stmt, new_temp);
3754 if (compute_in_loop)
3755 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3758 /* Generate the misalignment computation outside LOOP. */
3759 pe = loop_preheader_edge (loop);
3760 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3761 gcc_assert (!new_bb);
3764 *realignment_token = gimple_call_lhs (new_stmt);
3766 /* The result of the CALL_EXPR to this builtin is determined from
3767 the value of the parameter and no global variables are touched
3768 which makes the builtin a "const" function. Requiring the
3769 builtin to have the "const" attribute makes it unnecessary
3770 to call mark_call_clobbered. */
3771 gcc_assert (TREE_READONLY (builtin_decl));
3774 if (alignment_support_scheme == dr_explicit_realign)
3777 gcc_assert (!compute_in_loop);
3778 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3781 /* 5. Create msq = phi <msq_init, lsq> in loop */
3783 pe = loop_preheader_edge (containing_loop);
3784 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3785 msq = make_ssa_name (vec_dest, NULL);
3786 phi_stmt = create_phi_node (msq, containing_loop->header);
3787 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3788 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3794 /* Function vect_strided_load_supported.
3796 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3797 and FALSE otherwise. */
3800 vect_strided_load_supported (tree vectype)
3802 optab perm_even_optab, perm_odd_optab;
3803 enum machine_mode mode;
3805 mode = TYPE_MODE (vectype);
3807 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3809 if (!perm_even_optab)
3811 if (vect_print_dump_info (REPORT_DETAILS))
3812 fprintf (vect_dump, "no optab for perm_even.");
3816 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3818 if (vect_print_dump_info (REPORT_DETAILS))
3819 fprintf (vect_dump, "perm_even op not supported by target.");
3823 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3825 if (!perm_odd_optab)
3827 if (vect_print_dump_info (REPORT_DETAILS))
3828 fprintf (vect_dump, "no optab for perm_odd.");
3832 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3834 if (vect_print_dump_info (REPORT_DETAILS))
3835 fprintf (vect_dump, "perm_odd op not supported by target.");
3842 /* Function vect_permute_load_chain.
3844 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3845 a power of 2, generate extract_even/odd stmts to reorder the input data
3846 correctly. Return the final references for loads in RESULT_CHAIN.
3848 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3849 The input is 4 vectors each containing 8 elements. We assign a number to each
3850 element, the input sequence is:
3852 1st vec: 0 1 2 3 4 5 6 7
3853 2nd vec: 8 9 10 11 12 13 14 15
3854 3rd vec: 16 17 18 19 20 21 22 23
3855 4th vec: 24 25 26 27 28 29 30 31
3857 The output sequence should be:
3859 1st vec: 0 4 8 12 16 20 24 28
3860 2nd vec: 1 5 9 13 17 21 25 29
3861 3rd vec: 2 6 10 14 18 22 26 30
3862 4th vec: 3 7 11 15 19 23 27 31
3864 i.e., the first output vector should contain the first elements of each
3865 interleaving group, etc.
3867 We use extract_even/odd instructions to create such output. The input of
3868 each extract_even/odd operation is two vectors
3872 and the output is the vector of extracted even/odd elements. The output of
3873 extract_even will be: 0 2 4 6
3874 and of extract_odd: 1 3 5 7
3877 The permutation is done in log LENGTH stages. In each stage extract_even
3878 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3879 their order. In our example,
3881 E1: extract_even (1st vec, 2nd vec)
3882 E2: extract_odd (1st vec, 2nd vec)
3883 E3: extract_even (3rd vec, 4th vec)
3884 E4: extract_odd (3rd vec, 4th vec)
3886 The output for the first stage will be:
3888 E1: 0 2 4 6 8 10 12 14
3889 E2: 1 3 5 7 9 11 13 15
3890 E3: 16 18 20 22 24 26 28 30
3891 E4: 17 19 21 23 25 27 29 31
3893 In order to proceed and create the correct sequence for the next stage (or
3894 for the correct output, if the second stage is the last one, as in our
3895 example), we first put the output of extract_even operation and then the
3896 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3897 The input for the second stage is:
3899 1st vec (E1): 0 2 4 6 8 10 12 14
3900 2nd vec (E3): 16 18 20 22 24 26 28 30
3901 3rd vec (E2): 1 3 5 7 9 11 13 15
3902 4th vec (E4): 17 19 21 23 25 27 29 31
3904 The output of the second stage:
3906 E1: 0 4 8 12 16 20 24 28
3907 E2: 2 6 10 14 18 22 26 30
3908 E3: 1 5 9 13 17 21 25 29
3909 E4: 3 7 11 15 19 23 27 31
3911 And RESULT_CHAIN after reordering:
3913 1st vec (E1): 0 4 8 12 16 20 24 28
3914 2nd vec (E3): 1 5 9 13 17 21 25 29
3915 3rd vec (E2): 2 6 10 14 18 22 26 30
3916 4th vec (E4): 3 7 11 15 19 23 27 31. */
3919 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3920 unsigned int length,
3922 gimple_stmt_iterator *gsi,
3923 VEC(tree,heap) **result_chain)
3925 tree perm_dest, data_ref, first_vect, second_vect;
3927 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3931 /* Check that the operation is supported. */
3932 if (!vect_strided_load_supported (vectype))
3935 *result_chain = VEC_copy (tree, heap, dr_chain);
3936 for (i = 0; i < exact_log2 (length); i++)
3938 for (j = 0; j < length; j +=2)
3940 first_vect = VEC_index (tree, dr_chain, j);
3941 second_vect = VEC_index (tree, dr_chain, j+1);
3943 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3944 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3945 DECL_GIMPLE_REG_P (perm_dest) = 1;
3946 add_referenced_var (perm_dest);
3948 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3949 perm_dest, first_vect,
3952 data_ref = make_ssa_name (perm_dest, perm_stmt);
3953 gimple_assign_set_lhs (perm_stmt, data_ref);
3954 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3955 mark_symbols_for_renaming (perm_stmt);
3957 VEC_replace (tree, *result_chain, j/2, data_ref);
3959 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3960 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3961 DECL_GIMPLE_REG_P (perm_dest) = 1;
3962 add_referenced_var (perm_dest);
3964 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3965 perm_dest, first_vect,
3967 data_ref = make_ssa_name (perm_dest, perm_stmt);
3968 gimple_assign_set_lhs (perm_stmt, data_ref);
3969 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3970 mark_symbols_for_renaming (perm_stmt);
3972 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3974 dr_chain = VEC_copy (tree, heap, *result_chain);
3980 /* Function vect_transform_strided_load.
3982 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3983 to perform their permutation and ascribe the result vectorized statements to
3984 the scalar statements.
3988 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3989 gimple_stmt_iterator *gsi)
3991 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3992 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3993 gimple next_stmt, new_stmt;
3994 VEC(tree,heap) *result_chain = NULL;
3995 unsigned int i, gap_count;
3998 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3999 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4000 vectors, that are ready for vector computation. */
4001 result_chain = VEC_alloc (tree, heap, size);
4003 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
4006 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4007 Since we scan the chain starting from it's first node, their order
4008 corresponds the order of data-refs in RESULT_CHAIN. */
4009 next_stmt = first_stmt;
4011 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4016 /* Skip the gaps. Loads created for the gaps will be removed by dead
4017 code elimination pass later. No need to check for the first stmt in
4018 the group, since it always exists.
4019 DR_GROUP_GAP is the number of steps in elements from the previous
4020 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4021 correspond to the gaps. */
4022 if (next_stmt != first_stmt
4023 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4031 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4032 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4033 copies, and we put the new vector statement in the first available
4035 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4036 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4039 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4042 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4044 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4047 prev_stmt = rel_stmt;
4049 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4052 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4057 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4059 /* If NEXT_STMT accesses the same DR as the previous statement,
4060 put the same TMP_DATA_REF as its vectorized statement; otherwise
4061 get the next data-ref from RESULT_CHAIN. */
4062 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4067 VEC_free (tree, heap, result_chain);
4071 /* Function vect_force_dr_alignment_p.
4073 Returns whether the alignment of a DECL can be forced to be aligned
4074 on ALIGNMENT bit boundary. */
4077 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4079 if (TREE_CODE (decl) != VAR_DECL)
4082 if (DECL_EXTERNAL (decl))
4085 if (TREE_ASM_WRITTEN (decl))
4088 if (TREE_STATIC (decl))
4089 return (alignment <= MAX_OFILE_ALIGNMENT);
4091 return (alignment <= MAX_STACK_ALIGNMENT);
4095 /* Return whether the data reference DR is supported with respect to its
4097 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4098 it is aligned, i.e., check if it is possible to vectorize it with different
4101 enum dr_alignment_support
4102 vect_supportable_dr_alignment (struct data_reference *dr,
4103 bool check_aligned_accesses)
4105 gimple stmt = DR_STMT (dr);
4106 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4107 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4108 enum machine_mode mode = TYPE_MODE (vectype);
4109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4110 struct loop *vect_loop = NULL;
4111 bool nested_in_vect_loop = false;
4113 if (aligned_access_p (dr) && !check_aligned_accesses)
4118 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4119 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4122 /* Possibly unaligned access. */
4124 /* We can choose between using the implicit realignment scheme (generating
4125 a misaligned_move stmt) and the explicit realignment scheme (generating
4126 aligned loads with a REALIGN_LOAD). There are two variants to the
4127 explicit realignment scheme: optimized, and unoptimized.
4128 We can optimize the realignment only if the step between consecutive
4129 vector loads is equal to the vector size. Since the vector memory
4130 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4131 is guaranteed that the misalignment amount remains the same throughout the
4132 execution of the vectorized loop. Therefore, we can create the
4133 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4134 at the loop preheader.
4136 However, in the case of outer-loop vectorization, when vectorizing a
4137 memory access in the inner-loop nested within the LOOP that is now being
4138 vectorized, while it is guaranteed that the misalignment of the
4139 vectorized memory access will remain the same in different outer-loop
4140 iterations, it is *not* guaranteed that is will remain the same throughout
4141 the execution of the inner-loop. This is because the inner-loop advances
4142 with the original scalar step (and not in steps of VS). If the inner-loop
4143 step happens to be a multiple of VS, then the misalignment remains fixed
4144 and we can use the optimized realignment scheme. For example:
4150 When vectorizing the i-loop in the above example, the step between
4151 consecutive vector loads is 1, and so the misalignment does not remain
4152 fixed across the execution of the inner-loop, and the realignment cannot
4153 be optimized (as illustrated in the following pseudo vectorized loop):
4155 for (i=0; i<N; i+=4)
4156 for (j=0; j<M; j++){
4157 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4158 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4159 // (assuming that we start from an aligned address).
4162 We therefore have to use the unoptimized realignment scheme:
4164 for (i=0; i<N; i+=4)
4165 for (j=k; j<M; j+=4)
4166 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4167 // that the misalignment of the initial address is
4170 The loop can then be vectorized as follows:
4172 for (k=0; k<4; k++){
4173 rt = get_realignment_token (&vp[k]);
4174 for (i=0; i<N; i+=4){
4176 for (j=k; j<M; j+=4){
4178 va = REALIGN_LOAD <v1,v2,rt>;
4185 if (DR_IS_READ (dr))
4187 bool is_packed = false;
4188 tree type = (TREE_TYPE (DR_REF (dr)));
4190 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4191 && (!targetm.vectorize.builtin_mask_for_load
4192 || targetm.vectorize.builtin_mask_for_load ()))
4194 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4195 if ((nested_in_vect_loop
4196 && (TREE_INT_CST_LOW (DR_STEP (dr))
4197 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4199 return dr_explicit_realign;
4201 return dr_explicit_realign_optimized;
4203 if (!known_alignment_for_access_p (dr))
4205 tree ba = DR_BASE_OBJECT (dr);
4208 is_packed = contains_packed_reference (ba);
4211 if (targetm.vectorize.
4212 support_vector_misalignment (mode, type,
4213 DR_MISALIGNMENT (dr), is_packed))
4214 /* Can't software pipeline the loads, but can at least do them. */
4215 return dr_unaligned_supported;
4219 bool is_packed = false;
4220 tree type = (TREE_TYPE (DR_REF (dr)));
4222 if (!known_alignment_for_access_p (dr))
4224 tree ba = DR_BASE_OBJECT (dr);
4227 is_packed = contains_packed_reference (ba);
4230 if (targetm.vectorize.
4231 support_vector_misalignment (mode, type,
4232 DR_MISALIGNMENT (dr), is_packed))
4233 return dr_unaligned_supported;
4237 return dr_unaligned_unsupported;