1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return the smallest scalar part of STMT.
47 This is used to determine the vectype of the stmt. We generally set the
48 vectype according to the type of the result (lhs). For stmts whose
49 result-type is different than the type of the arguments (e.g., demotion,
50 promotion), vectype will be reset appropriately (later). Note that we have
51 to visit the smallest datatype in this function, because that determines the
52 VF. If the smallest datatype in the loop is present only as the rhs of a
53 promotion operation - we'd miss it.
54 Such a case, where a variable of this datatype does not appear in the lhs
55 anywhere in the loop, can only occur if it's an invariant: e.g.:
56 'int_x = (int) short_inv', which we'd expect to have been optimized away by
57 invariant motion. However, we cannot rely on invariant motion to always
58 take invariants out of the loop, and so in the case of promotion we also
59 have to check the rhs.
60 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
64 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
65 HOST_WIDE_INT *rhs_size_unit)
67 tree scalar_type = gimple_expr_type (stmt);
68 HOST_WIDE_INT lhs, rhs;
70 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
72 if (is_gimple_assign (stmt)
73 && (gimple_assign_cast_p (stmt)
74 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
75 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
77 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
79 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
81 scalar_type = rhs_type;
90 /* Find the place of the data-ref in STMT in the interleaving chain that starts
91 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
94 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
96 gimple next_stmt = first_stmt;
99 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
102 while (next_stmt && next_stmt != stmt)
105 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
115 /* Function vect_insert_into_interleaving_chain.
117 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
120 vect_insert_into_interleaving_chain (struct data_reference *dra,
121 struct data_reference *drb)
125 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
126 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
128 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
129 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
132 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
133 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
136 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
137 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
141 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
144 /* We got to the end of the list. Insert here. */
145 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
146 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
150 /* Function vect_update_interleaving_chain.
152 For two data-refs DRA and DRB that are a part of a chain interleaved data
153 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
155 There are four possible cases:
156 1. New stmts - both DRA and DRB are not a part of any chain:
159 2. DRB is a part of a chain and DRA is not:
160 no need to update FIRST_DR
161 no need to insert DRB
162 insert DRA according to init
163 3. DRA is a part of a chain and DRB is not:
164 if (init of FIRST_DR > init of DRB)
166 NEXT(FIRST_DR) = previous FIRST_DR
168 insert DRB according to its init
169 4. both DRA and DRB are in some interleaving chains:
170 choose the chain with the smallest init of FIRST_DR
171 insert the nodes of the second chain into the first one. */
174 vect_update_interleaving_chain (struct data_reference *drb,
175 struct data_reference *dra)
177 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
178 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
179 tree next_init, init_dra_chain, init_drb_chain;
180 gimple first_a, first_b;
182 gimple node, prev, next, first_stmt;
184 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
185 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
187 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
188 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
189 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
193 /* 2. DRB is a part of a chain and DRA is not. */
194 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
196 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
197 /* Insert DRA into the chain of DRB. */
198 vect_insert_into_interleaving_chain (dra, drb);
202 /* 3. DRA is a part of a chain and DRB is not. */
203 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
205 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
206 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
210 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
212 /* DRB's init is smaller than the init of the stmt previously marked
213 as the first stmt of the interleaving chain of DRA. Therefore, we
214 update FIRST_STMT and put DRB in the head of the list. */
215 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
216 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
218 /* Update all the stmts in the list to point to the new FIRST_STMT. */
219 tmp = old_first_stmt;
222 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
223 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
228 /* Insert DRB in the list of DRA. */
229 vect_insert_into_interleaving_chain (drb, dra);
230 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
235 /* 4. both DRA and DRB are in some interleaving chains. */
236 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
237 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
238 if (first_a == first_b)
240 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
241 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
243 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
245 /* Insert the nodes of DRA chain into the DRB chain.
246 After inserting a node, continue from this node of the DRB chain (don't
247 start from the beginning. */
248 node = DR_GROUP_FIRST_DR (stmtinfo_a);
249 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
250 first_stmt = first_b;
254 /* Insert the nodes of DRB chain into the DRA chain.
255 After inserting a node, continue from this node of the DRA chain (don't
256 start from the beginning. */
257 node = DR_GROUP_FIRST_DR (stmtinfo_b);
258 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
259 first_stmt = first_a;
264 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
265 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
268 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
269 if (tree_int_cst_compare (next_init, node_init) > 0)
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
278 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
282 /* We got to the end of the list. Insert here. */
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
287 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
288 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
292 /* Check dependence between DRA and DRB for basic block vectorization.
293 If the accesses share same bases and offsets, we can compare their initial
294 constant offsets to decide whether they differ or not. In case of a read-
295 write dependence we check that the load is before the store to ensure that
296 vectorization will not change the order of the accesses. */
299 vect_drs_dependent_in_basic_block (struct data_reference *dra,
300 struct data_reference *drb)
302 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
305 /* We only call this function for pairs of loads and stores, but we verify
307 if (DR_IS_READ (dra) == DR_IS_READ (drb))
309 if (DR_IS_READ (dra))
315 /* Check that the data-refs have same bases and offsets. If not, we can't
316 determine if they are dependent. */
317 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
318 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
319 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
320 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
321 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
322 || !dr_equal_offsets_p (dra, drb))
325 /* Check the types. */
326 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
327 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
329 if (type_size_a != type_size_b
330 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
331 TREE_TYPE (DR_REF (drb))))
334 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
335 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
337 /* Two different locations - no dependence. */
338 if (init_a != init_b)
341 /* We have a read-write dependence. Check that the load is before the store.
342 When we vectorize basic blocks, vector load can be only before
343 corresponding scalar load, and vector store can be only after its
344 corresponding scalar store. So the order of the acceses is preserved in
345 case the load is before the store. */
346 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
347 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
354 /* Function vect_check_interleaving.
356 Check if DRA and DRB are a part of interleaving. In case they are, insert
357 DRA and DRB in an interleaving chain. */
360 vect_check_interleaving (struct data_reference *dra,
361 struct data_reference *drb)
363 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
365 /* Check that the data-refs have same first location (except init) and they
366 are both either store or load (not load and store). */
367 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
368 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
369 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
370 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
371 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
372 || !dr_equal_offsets_p (dra, drb)
373 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
374 || DR_IS_READ (dra) != DR_IS_READ (drb))
378 1. data-refs are of the same type
379 2. their steps are equal
380 3. the step (if greater than zero) is greater than the difference between
382 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
383 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
385 if (type_size_a != type_size_b
386 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
387 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
388 TREE_TYPE (DR_REF (drb))))
391 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
392 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
393 step = TREE_INT_CST_LOW (DR_STEP (dra));
397 /* If init_a == init_b + the size of the type * k, we have an interleaving,
398 and DRB is accessed before DRA. */
399 diff_mod_size = (init_a - init_b) % type_size_a;
401 if (step && (init_a - init_b) > step)
404 if (diff_mod_size == 0)
406 vect_update_interleaving_chain (drb, dra);
407 if (vect_print_dump_info (REPORT_DR_DETAILS))
409 fprintf (vect_dump, "Detected interleaving ");
410 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
411 fprintf (vect_dump, " and ");
412 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
419 /* If init_b == init_a + the size of the type * k, we have an
420 interleaving, and DRA is accessed before DRB. */
421 diff_mod_size = (init_b - init_a) % type_size_a;
423 if (step && (init_b - init_a) > step)
426 if (diff_mod_size == 0)
428 vect_update_interleaving_chain (dra, drb);
429 if (vect_print_dump_info (REPORT_DR_DETAILS))
431 fprintf (vect_dump, "Detected interleaving ");
432 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
433 fprintf (vect_dump, " and ");
434 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
443 /* Check if data references pointed by DR_I and DR_J are same or
444 belong to same interleaving group. Return FALSE if drs are
445 different, otherwise return TRUE. */
448 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
450 gimple stmt_i = DR_STMT (dr_i);
451 gimple stmt_j = DR_STMT (dr_j);
453 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
454 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
455 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
456 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
457 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
463 /* If address ranges represented by DDR_I and DDR_J are equal,
464 return TRUE, otherwise return FALSE. */
467 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
469 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
470 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
471 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
472 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
478 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
479 tested at run-time. Return TRUE if DDR was successfully inserted.
480 Return false if versioning is not supported. */
483 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
485 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
487 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
490 if (vect_print_dump_info (REPORT_DR_DETAILS))
492 fprintf (vect_dump, "mark for run-time aliasing test between ");
493 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
494 fprintf (vect_dump, " and ");
495 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
498 if (optimize_loop_nest_for_size_p (loop))
500 if (vect_print_dump_info (REPORT_DR_DETAILS))
501 fprintf (vect_dump, "versioning not supported when optimizing for size.");
505 /* FORNOW: We don't support versioning with outer-loop vectorization. */
508 if (vect_print_dump_info (REPORT_DR_DETAILS))
509 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
513 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
518 /* Function vect_analyze_data_ref_dependence.
520 Return TRUE if there (might) exist a dependence between a memory-reference
521 DRA and a memory-reference DRB. When versioning for alias may check a
522 dependence at run-time, return FALSE. Adjust *MAX_VF according to
523 the data dependence. */
526 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
527 loop_vec_info loop_vinfo, int *max_vf,
528 bool *data_dependence_in_bb)
531 struct loop *loop = NULL;
532 struct data_reference *dra = DDR_A (ddr);
533 struct data_reference *drb = DDR_B (ddr);
534 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
535 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
536 lambda_vector dist_v;
537 unsigned int loop_depth;
539 /* Don't bother to analyze statements marked as unvectorizable. */
540 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
541 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
544 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
546 /* Independent data accesses. */
547 vect_check_interleaving (dra, drb);
552 loop = LOOP_VINFO_LOOP (loop_vinfo);
554 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
557 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
561 if (vect_print_dump_info (REPORT_DR_DETAILS))
563 fprintf (vect_dump, "versioning for alias required: "
564 "can't determine dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
570 /* Add to list of ddrs that need to be tested at run-time. */
571 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
574 /* When vectorizing a basic block unknown depnedence can still mean
576 if (vect_check_interleaving (dra, drb))
579 if (vect_print_dump_info (REPORT_DR_DETAILS))
581 fprintf (vect_dump, "can't determine dependence between ");
582 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
583 fprintf (vect_dump, " and ");
584 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
587 /* We do not vectorize basic blocks with write-write dependencies. */
588 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
591 /* We deal with read-write dependencies in basic blocks later (by
592 verifying that all the loads in the basic block are before all the
594 *data_dependence_in_bb = true;
598 /* Versioning for alias is not yet supported for basic block SLP, and
599 dependence distance is unapplicable, hence, in case of known data
600 dependence, basic block vectorization is impossible for now. */
603 if (dra != drb && vect_check_interleaving (dra, drb))
606 if (vect_print_dump_info (REPORT_DR_DETAILS))
608 fprintf (vect_dump, "determined dependence between ");
609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 fprintf (vect_dump, " and ");
611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
614 /* Do not vectorize basic blcoks with write-write dependences. */
615 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
618 /* Check if this dependence is allowed in basic block vectorization. */
619 return vect_drs_dependent_in_basic_block (dra, drb);
622 /* Loop-based vectorization and known data dependence. */
623 if (DDR_NUM_DIST_VECTS (ddr) == 0)
625 if (vect_print_dump_info (REPORT_DR_DETAILS))
627 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
628 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
629 fprintf (vect_dump, " and ");
630 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
632 /* Add to list of ddrs that need to be tested at run-time. */
633 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
636 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
637 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
639 int dist = dist_v[loop_depth];
641 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "dependence distance = %d.", dist);
646 if (vect_print_dump_info (REPORT_DR_DETAILS))
648 fprintf (vect_dump, "dependence distance == 0 between ");
649 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
650 fprintf (vect_dump, " and ");
651 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
654 /* For interleaving, mark that there is a read-write dependency if
655 necessary. We check before that one of the data-refs is store. */
656 if (DR_IS_READ (dra))
657 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
660 if (DR_IS_READ (drb))
661 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
667 if (dist > 0 && DDR_REVERSED_P (ddr))
669 /* If DDR_REVERSED_P the order of the data-refs in DDR was
670 reversed (to make distance vector positive), and the actual
671 distance is negative. */
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance negative.");
678 && abs (dist) < *max_vf)
680 /* The dependence distance requires reduction of the maximal
681 vectorization factor. */
682 *max_vf = abs (dist);
683 if (vect_print_dump_info (REPORT_DR_DETAILS))
684 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
688 if (abs (dist) >= *max_vf)
690 /* Dependence distance does not create dependence, as far as
691 vectorization is concerned, in this case. */
692 if (vect_print_dump_info (REPORT_DR_DETAILS))
693 fprintf (vect_dump, "dependence distance >= VF.");
697 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
699 fprintf (vect_dump, "not vectorized, possible dependence "
700 "between data-refs ");
701 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
702 fprintf (vect_dump, " and ");
703 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
712 /* Function vect_analyze_data_ref_dependences.
714 Examine all the data references in the loop, and make sure there do not
715 exist any data dependences between them. Set *MAX_VF according to
716 the maximum vectorization factor the data dependences allow. */
719 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
720 bb_vec_info bb_vinfo, int *max_vf,
721 bool *data_dependence_in_bb)
724 VEC (ddr_p, heap) *ddrs = NULL;
725 struct data_dependence_relation *ddr;
727 if (vect_print_dump_info (REPORT_DETAILS))
728 fprintf (vect_dump, "=== vect_analyze_dependences ===");
731 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
733 ddrs = BB_VINFO_DDRS (bb_vinfo);
735 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
736 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
737 data_dependence_in_bb))
744 /* Function vect_compute_data_ref_alignment
746 Compute the misalignment of the data reference DR.
749 1. If during the misalignment computation it is found that the data reference
750 cannot be vectorized then false is returned.
751 2. DR_MISALIGNMENT (DR) is defined.
753 FOR NOW: No analysis is actually performed. Misalignment is calculated
754 only for trivial cases. TODO. */
757 vect_compute_data_ref_alignment (struct data_reference *dr)
759 gimple stmt = DR_STMT (dr);
760 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
761 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
762 struct loop *loop = NULL;
763 tree ref = DR_REF (dr);
765 tree base, base_addr;
768 tree aligned_to, alignment;
770 if (vect_print_dump_info (REPORT_DETAILS))
771 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
774 loop = LOOP_VINFO_LOOP (loop_vinfo);
776 /* Initialize misalignment to unknown. */
777 SET_DR_MISALIGNMENT (dr, -1);
779 misalign = DR_INIT (dr);
780 aligned_to = DR_ALIGNED_TO (dr);
781 base_addr = DR_BASE_ADDRESS (dr);
782 vectype = STMT_VINFO_VECTYPE (stmt_info);
784 /* In case the dataref is in an inner-loop of the loop that is being
785 vectorized (LOOP), we use the base and misalignment information
786 relative to the outer-loop (LOOP). This is ok only if the misalignment
787 stays the same throughout the execution of the inner-loop, which is why
788 we have to check that the stride of the dataref in the inner-loop evenly
789 divides by the vector size. */
790 if (loop && nested_in_vect_loop_p (loop, stmt))
792 tree step = DR_STEP (dr);
793 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
795 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
797 if (vect_print_dump_info (REPORT_ALIGNMENT))
798 fprintf (vect_dump, "inner step divides the vector-size.");
799 misalign = STMT_VINFO_DR_INIT (stmt_info);
800 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
801 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
805 if (vect_print_dump_info (REPORT_ALIGNMENT))
806 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
807 misalign = NULL_TREE;
811 base = build_fold_indirect_ref (base_addr);
812 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
814 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
817 if (vect_print_dump_info (REPORT_ALIGNMENT))
819 fprintf (vect_dump, "Unknown alignment for access: ");
820 print_generic_expr (vect_dump, base, TDF_SLIM);
826 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
828 || (TREE_CODE (base_addr) == SSA_NAME
829 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
830 TREE_TYPE (base_addr)))),
834 base_aligned = false;
838 /* Do not change the alignment of global variables if
839 flag_section_anchors is enabled. */
840 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
841 || (TREE_STATIC (base) && flag_section_anchors))
843 if (vect_print_dump_info (REPORT_DETAILS))
845 fprintf (vect_dump, "can't force alignment of ref: ");
846 print_generic_expr (vect_dump, ref, TDF_SLIM);
851 /* Force the alignment of the decl.
852 NOTE: This is the only change to the code we make during
853 the analysis phase, before deciding to vectorize the loop. */
854 if (vect_print_dump_info (REPORT_DETAILS))
856 fprintf (vect_dump, "force alignment of ");
857 print_generic_expr (vect_dump, ref, TDF_SLIM);
860 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
861 DECL_USER_ALIGN (base) = 1;
864 /* At this point we assume that the base is aligned. */
865 gcc_assert (base_aligned
866 || (TREE_CODE (base) == VAR_DECL
867 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
869 /* If this is a backward running DR then first access in the larger
870 vectype actually is N-1 elements before the address in the DR.
871 Adjust misalign accordingly. */
872 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
874 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
875 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
876 otherwise we wouldn't be here. */
877 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
878 /* PLUS because DR_STEP was negative. */
879 misalign = size_binop (PLUS_EXPR, misalign, offset);
882 /* Modulo alignment. */
883 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
885 if (!host_integerp (misalign, 1))
887 /* Negative or overflowed misalignment value. */
888 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "unexpected misalign value");
893 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
895 if (vect_print_dump_info (REPORT_DETAILS))
897 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
898 print_generic_expr (vect_dump, ref, TDF_SLIM);
905 /* Function vect_compute_data_refs_alignment
907 Compute the misalignment of data references in the loop.
908 Return FALSE if a data reference is found that cannot be vectorized. */
911 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
912 bb_vec_info bb_vinfo)
914 VEC (data_reference_p, heap) *datarefs;
915 struct data_reference *dr;
919 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
921 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
923 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
924 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
925 && !vect_compute_data_ref_alignment (dr))
929 /* Mark unsupported statement as unvectorizable. */
930 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
941 /* Function vect_update_misalignment_for_peel
943 DR - the data reference whose misalignment is to be adjusted.
944 DR_PEEL - the data reference whose misalignment is being made
945 zero in the vector loop by the peel.
946 NPEEL - the number of iterations in the peel loop if the misalignment
947 of DR_PEEL is known at compile time. */
950 vect_update_misalignment_for_peel (struct data_reference *dr,
951 struct data_reference *dr_peel, int npeel)
954 VEC(dr_p,heap) *same_align_drs;
955 struct data_reference *current_dr;
956 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
957 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
958 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
959 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
961 /* For interleaved data accesses the step in the loop must be multiplied by
962 the size of the interleaving group. */
963 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
964 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
965 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
966 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
968 /* It can be assumed that the data refs with the same alignment as dr_peel
969 are aligned in the vector loop. */
971 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
972 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
974 if (current_dr != dr)
976 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
977 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
978 SET_DR_MISALIGNMENT (dr, 0);
982 if (known_alignment_for_access_p (dr)
983 && known_alignment_for_access_p (dr_peel))
985 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
986 int misal = DR_MISALIGNMENT (dr);
987 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
988 misal += negative ? -npeel * dr_size : npeel * dr_size;
989 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
990 SET_DR_MISALIGNMENT (dr, misal);
994 if (vect_print_dump_info (REPORT_DETAILS))
995 fprintf (vect_dump, "Setting misalignment to -1.");
996 SET_DR_MISALIGNMENT (dr, -1);
1000 /* Function vect_verify_datarefs_alignment
1002 Return TRUE if all data references in the loop can be
1003 handled with respect to alignment. */
1006 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1008 VEC (data_reference_p, heap) *datarefs;
1009 struct data_reference *dr;
1010 enum dr_alignment_support supportable_dr_alignment;
1014 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1016 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1018 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1020 gimple stmt = DR_STMT (dr);
1021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1023 /* For interleaving, only the alignment of the first access matters.
1024 Skip statements marked as not vectorizable. */
1025 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1026 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1027 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1030 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1031 if (!supportable_dr_alignment)
1033 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1035 if (DR_IS_READ (dr))
1037 "not vectorized: unsupported unaligned load.");
1040 "not vectorized: unsupported unaligned store.");
1042 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1046 if (supportable_dr_alignment != dr_aligned
1047 && vect_print_dump_info (REPORT_ALIGNMENT))
1048 fprintf (vect_dump, "Vectorizing an unaligned access.");
1054 /* Function vector_alignment_reachable_p
1056 Return true if vector alignment for DR is reachable by peeling
1057 a few loop iterations. Return false otherwise. */
1060 vector_alignment_reachable_p (struct data_reference *dr)
1062 gimple stmt = DR_STMT (dr);
1063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1064 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1066 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1068 /* For interleaved access we peel only if number of iterations in
1069 the prolog loop ({VF - misalignment}), is a multiple of the
1070 number of the interleaved accesses. */
1071 int elem_size, mis_in_elements;
1072 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1074 /* FORNOW: handle only known alignment. */
1075 if (!known_alignment_for_access_p (dr))
1078 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1079 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1081 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1085 /* If misalignment is known at the compile time then allow peeling
1086 only if natural alignment is reachable through peeling. */
1087 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1089 HOST_WIDE_INT elmsize =
1090 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1091 if (vect_print_dump_info (REPORT_DETAILS))
1093 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1094 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1096 if (DR_MISALIGNMENT (dr) % elmsize)
1098 if (vect_print_dump_info (REPORT_DETAILS))
1099 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1104 if (!known_alignment_for_access_p (dr))
1106 tree type = (TREE_TYPE (DR_REF (dr)));
1107 tree ba = DR_BASE_OBJECT (dr);
1108 bool is_packed = false;
1111 is_packed = contains_packed_reference (ba);
1113 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1116 if (vect_print_dump_info (REPORT_DETAILS))
1117 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1118 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1128 /* Calculate the cost of the memory access represented by DR. */
1131 vect_get_data_access_cost (struct data_reference *dr,
1132 unsigned int *inside_cost,
1133 unsigned int *outside_cost)
1135 gimple stmt = DR_STMT (dr);
1136 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1137 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1138 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1139 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1140 int ncopies = vf / nunits;
1141 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1143 if (!supportable_dr_alignment)
1144 *inside_cost = VECT_MAX_COST;
1147 if (DR_IS_READ (dr))
1148 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1150 vect_get_store_cost (dr, ncopies, inside_cost);
1153 if (vect_print_dump_info (REPORT_COST))
1154 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1155 "outside_cost = %d.", *inside_cost, *outside_cost);
1160 vect_peeling_hash (const void *elem)
1162 const struct _vect_peel_info *peel_info;
1164 peel_info = (const struct _vect_peel_info *) elem;
1165 return (hashval_t) peel_info->npeel;
1170 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1172 const struct _vect_peel_info *a, *b;
1174 a = (const struct _vect_peel_info *) elem1;
1175 b = (const struct _vect_peel_info *) elem2;
1176 return (a->npeel == b->npeel);
1180 /* Insert DR into peeling hash table with NPEEL as key. */
1183 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1186 struct _vect_peel_info elem, *slot;
1188 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1191 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1197 slot = XNEW (struct _vect_peel_info);
1198 slot->npeel = npeel;
1201 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1206 if (!supportable_dr_alignment && !flag_vect_cost_model)
1207 slot->count += VECT_MAX_COST;
1211 /* Traverse peeling hash table to find peeling option that aligns maximum
1212 number of data accesses. */
1215 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1217 vect_peel_info elem = (vect_peel_info) *slot;
1218 vect_peel_extended_info max = (vect_peel_extended_info) data;
1220 if (elem->count > max->peel_info.count)
1222 max->peel_info.npeel = elem->npeel;
1223 max->peel_info.count = elem->count;
1224 max->peel_info.dr = elem->dr;
1231 /* Traverse peeling hash table and calculate cost for each peeling option.
1232 Find the one with the lowest cost. */
1235 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1237 vect_peel_info elem = (vect_peel_info) *slot;
1238 vect_peel_extended_info min = (vect_peel_extended_info) data;
1239 int save_misalignment, dummy;
1240 unsigned int inside_cost = 0, outside_cost = 0, i;
1241 gimple stmt = DR_STMT (elem->dr);
1242 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1243 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1244 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1245 struct data_reference *dr;
1247 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1249 stmt = DR_STMT (dr);
1250 stmt_info = vinfo_for_stmt (stmt);
1251 /* For interleaving, only the alignment of the first access
1253 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1254 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1257 save_misalignment = DR_MISALIGNMENT (dr);
1258 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1259 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1260 SET_DR_MISALIGNMENT (dr, save_misalignment);
1263 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1264 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1266 if (inside_cost < min->inside_cost
1267 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1269 min->inside_cost = inside_cost;
1270 min->outside_cost = outside_cost;
1271 min->peel_info.dr = elem->dr;
1272 min->peel_info.npeel = elem->npeel;
1279 /* Choose best peeling option by traversing peeling hash table and either
1280 choosing an option with the lowest cost (if cost model is enabled) or the
1281 option that aligns as many accesses as possible. */
1283 static struct data_reference *
1284 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1285 unsigned int *npeel)
1287 struct _vect_peel_extended_info res;
1289 res.peel_info.dr = NULL;
1291 if (flag_vect_cost_model)
1293 res.inside_cost = INT_MAX;
1294 res.outside_cost = INT_MAX;
1295 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1296 vect_peeling_hash_get_lowest_cost, &res);
1300 res.peel_info.count = 0;
1301 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1302 vect_peeling_hash_get_most_frequent, &res);
1305 *npeel = res.peel_info.npeel;
1306 return res.peel_info.dr;
1310 /* Function vect_enhance_data_refs_alignment
1312 This pass will use loop versioning and loop peeling in order to enhance
1313 the alignment of data references in the loop.
1315 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1316 original loop is to be vectorized. Any other loops that are created by
1317 the transformations performed in this pass - are not supposed to be
1318 vectorized. This restriction will be relaxed.
1320 This pass will require a cost model to guide it whether to apply peeling
1321 or versioning or a combination of the two. For example, the scheme that
1322 intel uses when given a loop with several memory accesses, is as follows:
1323 choose one memory access ('p') which alignment you want to force by doing
1324 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1325 other accesses are not necessarily aligned, or (2) use loop versioning to
1326 generate one loop in which all accesses are aligned, and another loop in
1327 which only 'p' is necessarily aligned.
1329 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1330 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1331 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1333 Devising a cost model is the most critical aspect of this work. It will
1334 guide us on which access to peel for, whether to use loop versioning, how
1335 many versions to create, etc. The cost model will probably consist of
1336 generic considerations as well as target specific considerations (on
1337 powerpc for example, misaligned stores are more painful than misaligned
1340 Here are the general steps involved in alignment enhancements:
1342 -- original loop, before alignment analysis:
1343 for (i=0; i<N; i++){
1344 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1345 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1348 -- After vect_compute_data_refs_alignment:
1349 for (i=0; i<N; i++){
1350 x = q[i]; # DR_MISALIGNMENT(q) = 3
1351 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1354 -- Possibility 1: we do loop versioning:
1356 for (i=0; i<N; i++){ # loop 1A
1357 x = q[i]; # DR_MISALIGNMENT(q) = 3
1358 p[i] = y; # DR_MISALIGNMENT(p) = 0
1362 for (i=0; i<N; i++){ # loop 1B
1363 x = q[i]; # DR_MISALIGNMENT(q) = 3
1364 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1368 -- Possibility 2: we do loop peeling:
1369 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1373 for (i = 3; i < N; i++){ # loop 2A
1374 x = q[i]; # DR_MISALIGNMENT(q) = 0
1375 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1378 -- Possibility 3: combination of loop peeling and versioning:
1379 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1384 for (i = 3; i<N; i++){ # loop 3A
1385 x = q[i]; # DR_MISALIGNMENT(q) = 0
1386 p[i] = y; # DR_MISALIGNMENT(p) = 0
1390 for (i = 3; i<N; i++){ # loop 3B
1391 x = q[i]; # DR_MISALIGNMENT(q) = 0
1392 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1396 These loops are later passed to loop_transform to be vectorized. The
1397 vectorizer will use the alignment information to guide the transformation
1398 (whether to generate regular loads/stores, or with special handling for
1402 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1404 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1405 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1406 enum dr_alignment_support supportable_dr_alignment;
1407 struct data_reference *dr0 = NULL, *first_store = NULL;
1408 struct data_reference *dr;
1410 bool do_peeling = false;
1411 bool do_versioning = false;
1414 stmt_vec_info stmt_info;
1415 int vect_versioning_for_alias_required;
1416 unsigned int npeel = 0;
1417 bool all_misalignments_unknown = true;
1418 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1419 unsigned possible_npeel_number = 1;
1421 unsigned int nelements, mis, same_align_drs_max = 0;
1423 if (vect_print_dump_info (REPORT_DETAILS))
1424 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1426 /* While cost model enhancements are expected in the future, the high level
1427 view of the code at this time is as follows:
1429 A) If there is a misaligned access then see if peeling to align
1430 this access can make all data references satisfy
1431 vect_supportable_dr_alignment. If so, update data structures
1432 as needed and return true.
1434 B) If peeling wasn't possible and there is a data reference with an
1435 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1436 then see if loop versioning checks can be used to make all data
1437 references satisfy vect_supportable_dr_alignment. If so, update
1438 data structures as needed and return true.
1440 C) If neither peeling nor versioning were successful then return false if
1441 any data reference does not satisfy vect_supportable_dr_alignment.
1443 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1445 Note, Possibility 3 above (which is peeling and versioning together) is not
1446 being done at this time. */
1448 /* (1) Peeling to force alignment. */
1450 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1452 + How many accesses will become aligned due to the peeling
1453 - How many accesses will become unaligned due to the peeling,
1454 and the cost of misaligned accesses.
1455 - The cost of peeling (the extra runtime checks, the increase
1458 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1460 stmt = DR_STMT (dr);
1461 stmt_info = vinfo_for_stmt (stmt);
1463 /* For interleaving, only the alignment of the first access
1465 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1466 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1469 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1470 do_peeling = vector_alignment_reachable_p (dr);
1473 if (known_alignment_for_access_p (dr))
1475 unsigned int npeel_tmp;
1476 bool negative = tree_int_cst_compare (DR_STEP (dr),
1477 size_zero_node) < 0;
1479 /* Save info about DR in the hash table. */
1480 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1481 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1482 htab_create (1, vect_peeling_hash,
1483 vect_peeling_hash_eq, free);
1485 vectype = STMT_VINFO_VECTYPE (stmt_info);
1486 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1487 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1488 TREE_TYPE (DR_REF (dr))));
1489 npeel_tmp = (negative
1490 ? (mis - nelements) : (nelements - mis))
1493 /* For multiple types, it is possible that the bigger type access
1494 will have more than one peeling option. E.g., a loop with two
1495 types: one of size (vector size / 4), and the other one of
1496 size (vector size / 8). Vectorization factor will 8. If both
1497 access are misaligned by 3, the first one needs one scalar
1498 iteration to be aligned, and the second one needs 5. But the
1499 the first one will be aligned also by peeling 5 scalar
1500 iterations, and in that case both accesses will be aligned.
1501 Hence, except for the immediate peeling amount, we also want
1502 to try to add full vector size, while we don't exceed
1503 vectorization factor.
1504 We do this automtically for cost model, since we calculate cost
1505 for every peeling option. */
1506 if (!flag_vect_cost_model)
1507 possible_npeel_number = vf /nelements;
1509 /* Handle the aligned case. We may decide to align some other
1510 access, making DR unaligned. */
1511 if (DR_MISALIGNMENT (dr) == 0)
1514 if (!flag_vect_cost_model)
1515 possible_npeel_number++;
1518 for (j = 0; j < possible_npeel_number; j++)
1520 gcc_assert (npeel_tmp <= vf);
1521 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1522 npeel_tmp += nelements;
1525 all_misalignments_unknown = false;
1526 /* Data-ref that was chosen for the case that all the
1527 misalignments are unknown is not relevant anymore, since we
1528 have a data-ref with known alignment. */
1533 /* If we don't know all the misalignment values, we prefer
1534 peeling for data-ref that has maximum number of data-refs
1535 with the same alignment, unless the target prefers to align
1536 stores over load. */
1537 if (all_misalignments_unknown)
1539 if (same_align_drs_max < VEC_length (dr_p,
1540 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1543 same_align_drs_max = VEC_length (dr_p,
1544 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1548 if (!first_store && DR_IS_WRITE (dr))
1552 /* If there are both known and unknown misaligned accesses in the
1553 loop, we choose peeling amount according to the known
1557 if (!supportable_dr_alignment)
1560 if (!first_store && DR_IS_WRITE (dr))
1567 if (!aligned_access_p (dr))
1569 if (vect_print_dump_info (REPORT_DETAILS))
1570 fprintf (vect_dump, "vector alignment may not be reachable");
1577 vect_versioning_for_alias_required
1578 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1580 /* Temporarily, if versioning for alias is required, we disable peeling
1581 until we support peeling and versioning. Often peeling for alignment
1582 will require peeling for loop-bound, which in turn requires that we
1583 know how to adjust the loop ivs after the loop. */
1584 if (vect_versioning_for_alias_required
1585 || !vect_can_advance_ivs_p (loop_vinfo)
1586 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1589 if (do_peeling && all_misalignments_unknown
1590 && vect_supportable_dr_alignment (dr0, false))
1593 /* Check if the target requires to prefer stores over loads, i.e., if
1594 misaligned stores are more expensive than misaligned loads (taking
1595 drs with same alignment into account). */
1596 if (first_store && DR_IS_READ (dr0))
1598 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1599 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1600 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1601 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1603 vect_get_data_access_cost (dr0, &load_inside_cost,
1604 &load_outside_cost);
1605 vect_get_data_access_cost (first_store, &store_inside_cost,
1606 &store_outside_cost);
1608 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1609 aligning the load DR0). */
1610 load_inside_penalty = store_inside_cost;
1611 load_outside_penalty = store_outside_cost;
1612 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1613 (vinfo_for_stmt (DR_STMT (first_store))),
1616 if (DR_IS_READ (dr))
1618 load_inside_penalty += load_inside_cost;
1619 load_outside_penalty += load_outside_cost;
1623 load_inside_penalty += store_inside_cost;
1624 load_outside_penalty += store_outside_cost;
1627 /* Calculate the penalty for leaving DR0 unaligned (by
1628 aligning the FIRST_STORE). */
1629 store_inside_penalty = load_inside_cost;
1630 store_outside_penalty = load_outside_cost;
1631 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1632 (vinfo_for_stmt (DR_STMT (dr0))),
1635 if (DR_IS_READ (dr))
1637 store_inside_penalty += load_inside_cost;
1638 store_outside_penalty += load_outside_cost;
1642 store_inside_penalty += store_inside_cost;
1643 store_outside_penalty += store_outside_cost;
1646 if (load_inside_penalty > store_inside_penalty
1647 || (load_inside_penalty == store_inside_penalty
1648 && load_outside_penalty > store_outside_penalty))
1652 /* In case there are only loads with different unknown misalignments, use
1653 peeling only if it may help to align other accesses in the loop. */
1654 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1655 (vinfo_for_stmt (DR_STMT (dr0))))
1656 && vect_supportable_dr_alignment (dr0, false)
1657 != dr_unaligned_supported)
1661 if (do_peeling && !dr0)
1663 /* Peeling is possible, but there is no data access that is not supported
1664 unless aligned. So we try to choose the best possible peeling. */
1666 /* We should get here only if there are drs with known misalignment. */
1667 gcc_assert (!all_misalignments_unknown);
1669 /* Choose the best peeling from the hash table. */
1670 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1677 stmt = DR_STMT (dr0);
1678 stmt_info = vinfo_for_stmt (stmt);
1679 vectype = STMT_VINFO_VECTYPE (stmt_info);
1680 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1682 if (known_alignment_for_access_p (dr0))
1684 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1685 size_zero_node) < 0;
1688 /* Since it's known at compile time, compute the number of
1689 iterations in the peeled loop (the peeling factor) for use in
1690 updating DR_MISALIGNMENT values. The peeling factor is the
1691 vectorization factor minus the misalignment as an element
1693 mis = DR_MISALIGNMENT (dr0);
1694 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1695 npeel = ((negative ? mis - nelements : nelements - mis)
1699 /* For interleaved data access every iteration accesses all the
1700 members of the group, therefore we divide the number of iterations
1701 by the group size. */
1702 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1703 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1704 npeel /= DR_GROUP_SIZE (stmt_info);
1706 if (vect_print_dump_info (REPORT_DETAILS))
1707 fprintf (vect_dump, "Try peeling by %d", npeel);
1710 /* Ensure that all data refs can be vectorized after the peel. */
1711 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1713 int save_misalignment;
1718 stmt = DR_STMT (dr);
1719 stmt_info = vinfo_for_stmt (stmt);
1720 /* For interleaving, only the alignment of the first access
1722 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1723 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1726 save_misalignment = DR_MISALIGNMENT (dr);
1727 vect_update_misalignment_for_peel (dr, dr0, npeel);
1728 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1729 SET_DR_MISALIGNMENT (dr, save_misalignment);
1731 if (!supportable_dr_alignment)
1738 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1740 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1749 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1750 If the misalignment of DR_i is identical to that of dr0 then set
1751 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1752 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1753 by the peeling factor times the element size of DR_i (MOD the
1754 vectorization factor times the size). Otherwise, the
1755 misalignment of DR_i must be set to unknown. */
1756 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1758 vect_update_misalignment_for_peel (dr, dr0, npeel);
1760 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1762 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1764 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1765 SET_DR_MISALIGNMENT (dr0, 0);
1766 if (vect_print_dump_info (REPORT_ALIGNMENT))
1767 fprintf (vect_dump, "Alignment of access forced using peeling.");
1769 if (vect_print_dump_info (REPORT_DETAILS))
1770 fprintf (vect_dump, "Peeling for alignment will be applied.");
1772 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1779 /* (2) Versioning to force alignment. */
1781 /* Try versioning if:
1782 1) flag_tree_vect_loop_version is TRUE
1783 2) optimize loop for speed
1784 3) there is at least one unsupported misaligned data ref with an unknown
1786 4) all misaligned data refs with a known misalignment are supported, and
1787 5) the number of runtime alignment checks is within reason. */
1790 flag_tree_vect_loop_version
1791 && optimize_loop_nest_for_speed_p (loop)
1792 && (!loop->inner); /* FORNOW */
1796 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1798 stmt = DR_STMT (dr);
1799 stmt_info = vinfo_for_stmt (stmt);
1801 /* For interleaving, only the alignment of the first access
1803 if (aligned_access_p (dr)
1804 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1805 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1808 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1810 if (!supportable_dr_alignment)
1816 if (known_alignment_for_access_p (dr)
1817 || VEC_length (gimple,
1818 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1819 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1821 do_versioning = false;
1825 stmt = DR_STMT (dr);
1826 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1827 gcc_assert (vectype);
1829 /* The rightmost bits of an aligned address must be zeros.
1830 Construct the mask needed for this test. For example,
1831 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1832 mask must be 15 = 0xf. */
1833 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1835 /* FORNOW: use the same mask to test all potentially unaligned
1836 references in the loop. The vectorizer currently supports
1837 a single vector size, see the reference to
1838 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1839 vectorization factor is computed. */
1840 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1841 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1842 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1843 VEC_safe_push (gimple, heap,
1844 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1849 /* Versioning requires at least one misaligned data reference. */
1850 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1851 do_versioning = false;
1852 else if (!do_versioning)
1853 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1858 VEC(gimple,heap) *may_misalign_stmts
1859 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1862 /* It can now be assumed that the data references in the statements
1863 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1864 of the loop being vectorized. */
1865 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1868 dr = STMT_VINFO_DATA_REF (stmt_info);
1869 SET_DR_MISALIGNMENT (dr, 0);
1870 if (vect_print_dump_info (REPORT_ALIGNMENT))
1871 fprintf (vect_dump, "Alignment of access forced using versioning.");
1874 if (vect_print_dump_info (REPORT_DETAILS))
1875 fprintf (vect_dump, "Versioning for alignment will be applied.");
1877 /* Peeling and versioning can't be done together at this time. */
1878 gcc_assert (! (do_peeling && do_versioning));
1880 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1885 /* This point is reached if neither peeling nor versioning is being done. */
1886 gcc_assert (! (do_peeling || do_versioning));
1888 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1893 /* Function vect_find_same_alignment_drs.
1895 Update group and alignment relations according to the chosen
1896 vectorization factor. */
1899 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1900 loop_vec_info loop_vinfo)
1903 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1904 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1905 struct data_reference *dra = DDR_A (ddr);
1906 struct data_reference *drb = DDR_B (ddr);
1907 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1908 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1909 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1910 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1911 lambda_vector dist_v;
1912 unsigned int loop_depth;
1914 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1920 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1923 /* Loop-based vectorization and known data dependence. */
1924 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1927 /* Data-dependence analysis reports a distance vector of zero
1928 for data-references that overlap only in the first iteration
1929 but have different sign step (see PR45764).
1930 So as a sanity check require equal DR_STEP. */
1931 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1934 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1935 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1937 int dist = dist_v[loop_depth];
1939 if (vect_print_dump_info (REPORT_DR_DETAILS))
1940 fprintf (vect_dump, "dependence distance = %d.", dist);
1942 /* Same loop iteration. */
1944 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1946 /* Two references with distance zero have the same alignment. */
1947 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1948 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1949 if (vect_print_dump_info (REPORT_ALIGNMENT))
1950 fprintf (vect_dump, "accesses have the same alignment.");
1951 if (vect_print_dump_info (REPORT_DR_DETAILS))
1953 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1954 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1955 fprintf (vect_dump, " and ");
1956 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1963 /* Function vect_analyze_data_refs_alignment
1965 Analyze the alignment of the data-references in the loop.
1966 Return FALSE if a data reference is found that cannot be vectorized. */
1969 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1970 bb_vec_info bb_vinfo)
1972 if (vect_print_dump_info (REPORT_DETAILS))
1973 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1975 /* Mark groups of data references with same alignment using
1976 data dependence information. */
1979 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1980 struct data_dependence_relation *ddr;
1983 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
1984 vect_find_same_alignment_drs (ddr, loop_vinfo);
1987 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1989 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1991 "not vectorized: can't calculate alignment for data ref.");
1999 /* Analyze groups of strided accesses: check that DR belongs to a group of
2000 strided accesses of legal size, step, etc. Detect gaps, single element
2001 interleaving, and other special cases. Set strided access info.
2002 Collect groups of strided stores for further use in SLP analysis. */
2005 vect_analyze_group_access (struct data_reference *dr)
2007 tree step = DR_STEP (dr);
2008 tree scalar_type = TREE_TYPE (DR_REF (dr));
2009 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2010 gimple stmt = DR_STMT (dr);
2011 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2012 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2013 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2014 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2015 HOST_WIDE_INT stride;
2016 bool slp_impossible = false;
2018 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2019 interleaving group (including gaps). */
2020 stride = dr_step / type_size;
2022 /* Not consecutive access is possible only if it is a part of interleaving. */
2023 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2025 /* Check if it this DR is a part of interleaving, and is a single
2026 element of the group that is accessed in the loop. */
2028 /* Gaps are supported only for loads. STEP must be a multiple of the type
2029 size. The size of the group must be a power of 2. */
2031 && (dr_step % type_size) == 0
2033 && exact_log2 (stride) != -1)
2035 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2036 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2037 if (vect_print_dump_info (REPORT_DR_DETAILS))
2039 fprintf (vect_dump, "Detected single element interleaving ");
2040 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2041 fprintf (vect_dump, " step ");
2042 print_generic_expr (vect_dump, step, TDF_SLIM);
2047 if (vect_print_dump_info (REPORT_DETAILS))
2049 fprintf (vect_dump, "not consecutive access ");
2050 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2055 /* Mark the statement as unvectorizable. */
2056 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2063 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2065 /* First stmt in the interleaving chain. Check the chain. */
2066 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2067 struct data_reference *data_ref = dr;
2068 unsigned int count = 1;
2070 tree prev_init = DR_INIT (data_ref);
2072 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2076 /* Skip same data-refs. In case that two or more stmts share
2077 data-ref (supported only for loads), we vectorize only the first
2078 stmt, and the rest get their vectorized loads from the first
2080 if (!tree_int_cst_compare (DR_INIT (data_ref),
2081 DR_INIT (STMT_VINFO_DATA_REF (
2082 vinfo_for_stmt (next)))))
2084 if (DR_IS_WRITE (data_ref))
2086 if (vect_print_dump_info (REPORT_DETAILS))
2087 fprintf (vect_dump, "Two store stmts share the same dr.");
2091 /* Check that there is no load-store dependencies for this loads
2092 to prevent a case of load-store-load to the same location. */
2093 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2094 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2096 if (vect_print_dump_info (REPORT_DETAILS))
2098 "READ_WRITE dependence in interleaving.");
2102 /* For load use the same data-ref load. */
2103 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2106 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2111 /* Check that all the accesses have the same STEP. */
2112 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2113 if (tree_int_cst_compare (step, next_step))
2115 if (vect_print_dump_info (REPORT_DETAILS))
2116 fprintf (vect_dump, "not consecutive access in interleaving");
2120 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2121 /* Check that the distance between two accesses is equal to the type
2122 size. Otherwise, we have gaps. */
2123 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2124 - TREE_INT_CST_LOW (prev_init)) / type_size;
2127 /* FORNOW: SLP of accesses with gaps is not supported. */
2128 slp_impossible = true;
2129 if (DR_IS_WRITE (data_ref))
2131 if (vect_print_dump_info (REPORT_DETAILS))
2132 fprintf (vect_dump, "interleaved store with gaps");
2139 /* Store the gap from the previous member of the group. If there is no
2140 gap in the access, DR_GROUP_GAP is always 1. */
2141 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2143 prev_init = DR_INIT (data_ref);
2144 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2145 /* Count the number of data-refs in the chain. */
2149 /* COUNT is the number of accesses found, we multiply it by the size of
2150 the type to get COUNT_IN_BYTES. */
2151 count_in_bytes = type_size * count;
2153 /* Check that the size of the interleaving (including gaps) is not
2154 greater than STEP. */
2155 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2157 if (vect_print_dump_info (REPORT_DETAILS))
2159 fprintf (vect_dump, "interleaving size is greater than step for ");
2160 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2165 /* Check that the size of the interleaving is equal to STEP for stores,
2166 i.e., that there are no gaps. */
2167 if (dr_step && dr_step != count_in_bytes)
2169 if (DR_IS_READ (dr))
2171 slp_impossible = true;
2172 /* There is a gap after the last load in the group. This gap is a
2173 difference between the stride and the number of elements. When
2174 there is no gap, this difference should be 0. */
2175 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2179 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "interleaved store with gaps");
2185 /* Check that STEP is a multiple of type size. */
2186 if (dr_step && (dr_step % type_size) != 0)
2188 if (vect_print_dump_info (REPORT_DETAILS))
2190 fprintf (vect_dump, "step is not a multiple of type size: step ");
2191 print_generic_expr (vect_dump, step, TDF_SLIM);
2192 fprintf (vect_dump, " size ");
2193 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2202 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2203 if (vect_print_dump_info (REPORT_DETAILS))
2204 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2206 /* SLP: create an SLP data structure for every interleaving group of
2207 stores for further analysis in vect_analyse_slp. */
2208 if (DR_IS_WRITE (dr) && !slp_impossible)
2211 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2214 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2223 /* Analyze the access pattern of the data-reference DR.
2224 In case of non-consecutive accesses call vect_analyze_group_access() to
2225 analyze groups of strided accesses. */
2228 vect_analyze_data_ref_access (struct data_reference *dr)
2230 tree step = DR_STEP (dr);
2231 tree scalar_type = TREE_TYPE (DR_REF (dr));
2232 gimple stmt = DR_STMT (dr);
2233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2234 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2235 struct loop *loop = NULL;
2236 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2239 loop = LOOP_VINFO_LOOP (loop_vinfo);
2241 if (loop_vinfo && !step)
2243 if (vect_print_dump_info (REPORT_DETAILS))
2244 fprintf (vect_dump, "bad data-ref access in loop");
2248 /* Don't allow invariant accesses in loops. */
2249 if (loop_vinfo && dr_step == 0)
2252 if (loop && nested_in_vect_loop_p (loop, stmt))
2254 /* Interleaved accesses are not yet supported within outer-loop
2255 vectorization for references in the inner-loop. */
2256 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2258 /* For the rest of the analysis we use the outer-loop step. */
2259 step = STMT_VINFO_DR_STEP (stmt_info);
2260 dr_step = TREE_INT_CST_LOW (step);
2264 if (vect_print_dump_info (REPORT_ALIGNMENT))
2265 fprintf (vect_dump, "zero step in outer loop.");
2266 if (DR_IS_READ (dr))
2274 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2276 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2278 /* Mark that it is not interleaving. */
2279 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2283 if (loop && nested_in_vect_loop_p (loop, stmt))
2285 if (vect_print_dump_info (REPORT_ALIGNMENT))
2286 fprintf (vect_dump, "strided access in outer loop.");
2290 /* Not consecutive access - check if it's a part of interleaving group. */
2291 return vect_analyze_group_access (dr);
2295 /* Function vect_analyze_data_ref_accesses.
2297 Analyze the access pattern of all the data references in the loop.
2299 FORNOW: the only access pattern that is considered vectorizable is a
2300 simple step 1 (consecutive) access.
2302 FORNOW: handle only arrays and pointer accesses. */
2305 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2308 VEC (data_reference_p, heap) *datarefs;
2309 struct data_reference *dr;
2311 if (vect_print_dump_info (REPORT_DETAILS))
2312 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2315 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2317 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2319 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2320 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2321 && !vect_analyze_data_ref_access (dr))
2323 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2324 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2328 /* Mark the statement as not vectorizable. */
2329 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2339 /* Function vect_prune_runtime_alias_test_list.
2341 Prune a list of ddrs to be tested at run-time by versioning for alias.
2342 Return FALSE if resulting list of ddrs is longer then allowed by
2343 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2346 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2348 VEC (ddr_p, heap) * ddrs =
2349 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2352 if (vect_print_dump_info (REPORT_DETAILS))
2353 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2355 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2360 ddr_i = VEC_index (ddr_p, ddrs, i);
2363 for (j = 0; j < i; j++)
2365 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2367 if (vect_vfa_range_equal (ddr_i, ddr_j))
2369 if (vect_print_dump_info (REPORT_DR_DETAILS))
2371 fprintf (vect_dump, "found equal ranges ");
2372 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2373 fprintf (vect_dump, ", ");
2374 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2375 fprintf (vect_dump, " and ");
2376 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2377 fprintf (vect_dump, ", ");
2378 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2387 VEC_ordered_remove (ddr_p, ddrs, i);
2393 if (VEC_length (ddr_p, ddrs) >
2394 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2396 if (vect_print_dump_info (REPORT_DR_DETAILS))
2399 "disable versioning for alias - max number of generated "
2400 "checks exceeded.");
2403 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2412 /* Function vect_analyze_data_refs.
2414 Find all the data references in the loop or basic block.
2416 The general structure of the analysis of data refs in the vectorizer is as
2418 1- vect_analyze_data_refs(loop/bb): call
2419 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2420 in the loop/bb and their dependences.
2421 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2422 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2423 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2428 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2429 bb_vec_info bb_vinfo,
2432 struct loop *loop = NULL;
2433 basic_block bb = NULL;
2435 VEC (data_reference_p, heap) *datarefs;
2436 struct data_reference *dr;
2440 if (vect_print_dump_info (REPORT_DETAILS))
2441 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2445 loop = LOOP_VINFO_LOOP (loop_vinfo);
2446 res = compute_data_dependences_for_loop
2448 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2449 &LOOP_VINFO_DATAREFS (loop_vinfo),
2450 &LOOP_VINFO_DDRS (loop_vinfo));
2454 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2455 fprintf (vect_dump, "not vectorized: loop contains function calls"
2456 " or data references that cannot be analyzed");
2460 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2464 bb = BB_VINFO_BB (bb_vinfo);
2465 res = compute_data_dependences_for_bb (bb, true,
2466 &BB_VINFO_DATAREFS (bb_vinfo),
2467 &BB_VINFO_DDRS (bb_vinfo));
2470 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2471 fprintf (vect_dump, "not vectorized: basic block contains function"
2472 " calls or data references that cannot be analyzed");
2476 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2479 /* Go through the data-refs, check that the analysis succeeded. Update
2480 pointer from stmt_vec_info struct to DR and vectype. */
2482 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2485 stmt_vec_info stmt_info;
2486 tree base, offset, init;
2489 if (!dr || !DR_REF (dr))
2491 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2492 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2496 stmt = DR_STMT (dr);
2497 stmt_info = vinfo_for_stmt (stmt);
2499 /* Check that analysis of the data-ref succeeded. */
2500 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2503 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2505 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2506 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2511 /* Mark the statement as not vectorizable. */
2512 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2519 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2521 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2522 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2526 /* Mark the statement as not vectorizable. */
2527 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2534 base = unshare_expr (DR_BASE_ADDRESS (dr));
2535 offset = unshare_expr (DR_OFFSET (dr));
2536 init = unshare_expr (DR_INIT (dr));
2538 if (stmt_can_throw_internal (stmt))
2540 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2542 fprintf (vect_dump, "not vectorized: statement can throw an "
2544 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2549 /* Update DR field in stmt_vec_info struct. */
2551 /* If the dataref is in an inner-loop of the loop that is considered for
2552 for vectorization, we also want to analyze the access relative to
2553 the outer-loop (DR contains information only relative to the
2554 inner-most enclosing loop). We do that by building a reference to the
2555 first location accessed by the inner-loop, and analyze it relative to
2557 if (loop && nested_in_vect_loop_p (loop, stmt))
2559 tree outer_step, outer_base, outer_init;
2560 HOST_WIDE_INT pbitsize, pbitpos;
2562 enum machine_mode pmode;
2563 int punsignedp, pvolatilep;
2564 affine_iv base_iv, offset_iv;
2567 /* Build a reference to the first location accessed by the
2568 inner-loop: *(BASE+INIT). (The first location is actually
2569 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2570 tree inner_base = build_fold_indirect_ref
2571 (fold_build2 (POINTER_PLUS_EXPR,
2572 TREE_TYPE (base), base,
2573 fold_convert (sizetype, init)));
2575 if (vect_print_dump_info (REPORT_DETAILS))
2577 fprintf (vect_dump, "analyze in outer-loop: ");
2578 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2581 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2582 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2583 gcc_assert (outer_base != NULL_TREE);
2585 if (pbitpos % BITS_PER_UNIT != 0)
2587 if (vect_print_dump_info (REPORT_DETAILS))
2588 fprintf (vect_dump, "failed: bit offset alignment.\n");
2592 outer_base = build_fold_addr_expr (outer_base);
2593 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2596 if (vect_print_dump_info (REPORT_DETAILS))
2597 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2604 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2612 offset_iv.base = ssize_int (0);
2613 offset_iv.step = ssize_int (0);
2615 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2618 if (vect_print_dump_info (REPORT_DETAILS))
2619 fprintf (vect_dump, "evolution of offset is not affine.\n");
2623 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2624 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2625 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2626 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2627 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2629 outer_step = size_binop (PLUS_EXPR,
2630 fold_convert (ssizetype, base_iv.step),
2631 fold_convert (ssizetype, offset_iv.step));
2633 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2634 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2635 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2636 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2637 STMT_VINFO_DR_OFFSET (stmt_info) =
2638 fold_convert (ssizetype, offset_iv.base);
2639 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2640 size_int (highest_pow2_factor (offset_iv.base));
2642 if (vect_print_dump_info (REPORT_DETAILS))
2644 fprintf (vect_dump, "\touter base_address: ");
2645 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2646 fprintf (vect_dump, "\n\touter offset from base address: ");
2647 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2648 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2649 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2650 fprintf (vect_dump, "\n\touter step: ");
2651 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2652 fprintf (vect_dump, "\n\touter aligned to: ");
2653 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2657 if (STMT_VINFO_DATA_REF (stmt_info))
2659 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2662 "not vectorized: more than one data ref in stmt: ");
2663 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2668 STMT_VINFO_DATA_REF (stmt_info) = dr;
2670 /* Set vectype for STMT. */
2671 scalar_type = TREE_TYPE (DR_REF (dr));
2672 STMT_VINFO_VECTYPE (stmt_info) =
2673 get_vectype_for_scalar_type (scalar_type);
2674 if (!STMT_VINFO_VECTYPE (stmt_info))
2676 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2679 "not vectorized: no vectype for stmt: ");
2680 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2681 fprintf (vect_dump, " scalar_type: ");
2682 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2687 /* Mark the statement as not vectorizable. */
2688 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2695 /* Adjust the minimal vectorization factor according to the
2697 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2706 /* Function vect_get_new_vect_var.
2708 Returns a name for a new variable. The current naming scheme appends the
2709 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2710 the name of vectorizer generated variables, and appends that to NAME if
2714 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2721 case vect_simple_var:
2724 case vect_scalar_var:
2727 case vect_pointer_var:
2736 char* tmp = concat (prefix, name, NULL);
2737 new_vect_var = create_tmp_var (type, tmp);
2741 new_vect_var = create_tmp_var (type, prefix);
2743 /* Mark vector typed variable as a gimple register variable. */
2744 if (TREE_CODE (type) == VECTOR_TYPE)
2745 DECL_GIMPLE_REG_P (new_vect_var) = true;
2747 return new_vect_var;
2751 /* Function vect_create_addr_base_for_vector_ref.
2753 Create an expression that computes the address of the first memory location
2754 that will be accessed for a data reference.
2757 STMT: The statement containing the data reference.
2758 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2759 OFFSET: Optional. If supplied, it is be added to the initial address.
2760 LOOP: Specify relative to which loop-nest should the address be computed.
2761 For example, when the dataref is in an inner-loop nested in an
2762 outer-loop that is now being vectorized, LOOP can be either the
2763 outer-loop, or the inner-loop. The first memory location accessed
2764 by the following dataref ('in' points to short):
2771 if LOOP=i_loop: &in (relative to i_loop)
2772 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2775 1. Return an SSA_NAME whose value is the address of the memory location of
2776 the first vector of the data reference.
2777 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2778 these statement(s) which define the returned SSA_NAME.
2780 FORNOW: We are only handling array accesses with step 1. */
2783 vect_create_addr_base_for_vector_ref (gimple stmt,
2784 gimple_seq *new_stmt_list,
2788 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2789 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2790 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2792 tree data_ref_base_var;
2794 tree addr_base, addr_expr;
2796 gimple_seq seq = NULL;
2797 tree base_offset = unshare_expr (DR_OFFSET (dr));
2798 tree init = unshare_expr (DR_INIT (dr));
2800 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2801 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2804 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2806 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2808 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2810 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2811 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2812 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2816 base_name = build_fold_indirect_ref (data_ref_base);
2819 base_offset = ssize_int (0);
2820 init = ssize_int (0);
2821 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2824 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2825 add_referenced_var (data_ref_base_var);
2826 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2828 gimple_seq_add_seq (new_stmt_list, seq);
2830 /* Create base_offset */
2831 base_offset = size_binop (PLUS_EXPR,
2832 fold_convert (sizetype, base_offset),
2833 fold_convert (sizetype, init));
2834 dest = create_tmp_var (sizetype, "base_off");
2835 add_referenced_var (dest);
2836 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2837 gimple_seq_add_seq (new_stmt_list, seq);
2841 tree tmp = create_tmp_var (sizetype, "offset");
2843 add_referenced_var (tmp);
2844 offset = fold_build2 (MULT_EXPR, sizetype,
2845 fold_convert (sizetype, offset), step);
2846 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2847 base_offset, offset);
2848 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2849 gimple_seq_add_seq (new_stmt_list, seq);
2852 /* base + base_offset */
2854 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2855 data_ref_base, base_offset);
2858 addr_base = build1 (ADDR_EXPR,
2859 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2860 unshare_expr (DR_REF (dr)));
2863 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2864 base = get_base_address (DR_REF (dr));
2866 && TREE_CODE (base) == MEM_REF)
2868 = build_qualified_type (vect_ptr_type,
2869 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2871 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2872 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2873 get_name (base_name));
2874 add_referenced_var (addr_expr);
2875 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2876 gimple_seq_add_seq (new_stmt_list, seq);
2878 if (DR_PTR_INFO (dr)
2879 && TREE_CODE (vec_stmt) == SSA_NAME)
2881 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2884 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2885 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2889 if (vect_print_dump_info (REPORT_DETAILS))
2891 fprintf (vect_dump, "created ");
2892 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2899 /* Function vect_create_data_ref_ptr.
2901 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2902 location accessed in the loop by STMT, along with the def-use update
2903 chain to appropriately advance the pointer through the loop iterations.
2904 Also set aliasing information for the pointer. This pointer is used by
2905 the callers to this function to create a memory reference expression for
2906 vector load/store access.
2909 1. STMT: a stmt that references memory. Expected to be of the form
2910 GIMPLE_ASSIGN <name, data-ref> or
2911 GIMPLE_ASSIGN <data-ref, name>.
2912 2. AGGR_TYPE: the type of the reference, which should be either a vector
2914 3. AT_LOOP: the loop where the vector memref is to be created.
2915 4. OFFSET (optional): an offset to be added to the initial address accessed
2916 by the data-ref in STMT.
2917 5. BSI: location where the new stmts are to be placed if there is no loop
2918 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2919 pointing to the initial address.
2922 1. Declare a new ptr to vector_type, and have it point to the base of the
2923 data reference (initial addressed accessed by the data reference).
2924 For example, for vector of type V8HI, the following code is generated:
2927 ap = (v8hi *)initial_address;
2929 if OFFSET is not supplied:
2930 initial_address = &a[init];
2931 if OFFSET is supplied:
2932 initial_address = &a[init + OFFSET];
2934 Return the initial_address in INITIAL_ADDRESS.
2936 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2937 update the pointer in each iteration of the loop.
2939 Return the increment stmt that updates the pointer in PTR_INCR.
2941 3. Set INV_P to true if the access pattern of the data reference in the
2942 vectorized loop is invariant. Set it to false otherwise.
2944 4. Return the pointer. */
2947 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
2948 tree offset, tree *initial_address,
2949 gimple_stmt_iterator *gsi, gimple *ptr_incr,
2950 bool only_init, bool *inv_p)
2953 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2954 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2955 struct loop *loop = NULL;
2956 bool nested_in_vect_loop = false;
2957 struct loop *containing_loop = NULL;
2962 gimple_seq new_stmt_list = NULL;
2966 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2968 gimple_stmt_iterator incr_gsi;
2971 tree indx_before_incr, indx_after_incr;
2974 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2977 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
2978 || TREE_CODE (aggr_type) == VECTOR_TYPE);
2982 loop = LOOP_VINFO_LOOP (loop_vinfo);
2983 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2984 containing_loop = (gimple_bb (stmt))->loop_father;
2985 pe = loop_preheader_edge (loop);
2989 gcc_assert (bb_vinfo);
2994 /* Check the step (evolution) of the load in LOOP, and record
2995 whether it's invariant. */
2996 if (nested_in_vect_loop)
2997 step = STMT_VINFO_DR_STEP (stmt_info);
2999 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3001 if (tree_int_cst_compare (step, size_zero_node) == 0)
3005 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3007 /* Create an expression for the first address accessed by this load
3009 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3011 if (vect_print_dump_info (REPORT_DETAILS))
3013 tree data_ref_base = base_name;
3014 fprintf (vect_dump, "create %s-pointer variable to type: ",
3015 tree_code_name[(int) TREE_CODE (aggr_type)]);
3016 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3017 if (TREE_CODE (data_ref_base) == VAR_DECL
3018 || TREE_CODE (data_ref_base) == ARRAY_REF)
3019 fprintf (vect_dump, " vectorizing an array ref: ");
3020 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3021 fprintf (vect_dump, " vectorizing a record based array ref: ");
3022 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3023 fprintf (vect_dump, " vectorizing a pointer ref: ");
3024 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3027 /* (1) Create the new aggregate-pointer variable. */
3028 aggr_ptr_type = build_pointer_type (aggr_type);
3029 base = get_base_address (DR_REF (dr));
3031 && TREE_CODE (base) == MEM_REF)
3033 = build_qualified_type (aggr_ptr_type,
3034 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3035 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3036 get_name (base_name));
3038 /* Vector and array types inherit the alias set of their component
3039 type by default so we need to use a ref-all pointer if the data
3040 reference does not conflict with the created aggregated data
3041 reference because it is not addressable. */
3042 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3043 get_alias_set (DR_REF (dr))))
3046 = build_pointer_type_for_mode (aggr_type,
3047 TYPE_MODE (aggr_ptr_type), true);
3048 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3049 get_name (base_name));
3052 /* Likewise for any of the data references in the stmt group. */
3053 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3055 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3058 tree lhs = gimple_assign_lhs (orig_stmt);
3059 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3060 get_alias_set (lhs)))
3063 = build_pointer_type_for_mode (aggr_type,
3064 TYPE_MODE (aggr_ptr_type), true);
3066 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3067 get_name (base_name));
3071 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3076 add_referenced_var (aggr_ptr);
3078 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3079 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3080 def-use update cycles for the pointer: one relative to the outer-loop
3081 (LOOP), which is what steps (3) and (4) below do. The other is relative
3082 to the inner-loop (which is the inner-most loop containing the dataref),
3083 and this is done be step (5) below.
3085 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3086 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3087 redundant. Steps (3),(4) create the following:
3090 LOOP: vp1 = phi(vp0,vp2)
3096 If there is an inner-loop nested in loop, then step (5) will also be
3097 applied, and an additional update in the inner-loop will be created:
3100 LOOP: vp1 = phi(vp0,vp2)
3102 inner: vp3 = phi(vp1,vp4)
3103 vp4 = vp3 + inner_step
3109 /* (2) Calculate the initial address of the aggregate-pointer, and set
3110 the aggregate-pointer to point to it before the loop. */
3112 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3114 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3120 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3121 gcc_assert (!new_bb);
3124 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3127 *initial_address = new_temp;
3129 /* Create: p = (aggr_type *) initial_base */
3130 if (TREE_CODE (new_temp) != SSA_NAME
3131 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3133 vec_stmt = gimple_build_assign (aggr_ptr,
3134 fold_convert (aggr_ptr_type, new_temp));
3135 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3136 /* Copy the points-to information if it exists. */
3137 if (DR_PTR_INFO (dr))
3138 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3139 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3142 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3143 gcc_assert (!new_bb);
3146 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3149 aggr_ptr_init = new_temp;
3151 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3152 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3153 inner-loop nested in LOOP (during outer-loop vectorization). */
3155 /* No update in loop is required. */
3156 if (only_init && (!loop_vinfo || at_loop == loop))
3157 aptr = aggr_ptr_init;
3160 /* The step of the aggregate pointer is the type size. */
3161 tree step = TYPE_SIZE_UNIT (aggr_type);
3162 /* One exception to the above is when the scalar step of the load in
3163 LOOP is zero. In this case the step here is also zero. */
3165 step = size_zero_node;
3167 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3169 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3171 create_iv (aggr_ptr_init,
3172 fold_convert (aggr_ptr_type, step),
3173 aggr_ptr, loop, &incr_gsi, insert_after,
3174 &indx_before_incr, &indx_after_incr);
3175 incr = gsi_stmt (incr_gsi);
3176 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3178 /* Copy the points-to information if it exists. */
3179 if (DR_PTR_INFO (dr))
3181 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3182 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3187 aptr = indx_before_incr;
3190 if (!nested_in_vect_loop || only_init)
3194 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3195 nested in LOOP, if exists. */
3197 gcc_assert (nested_in_vect_loop);
3200 standard_iv_increment_position (containing_loop, &incr_gsi,
3202 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3203 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3205 incr = gsi_stmt (incr_gsi);
3206 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3208 /* Copy the points-to information if it exists. */
3209 if (DR_PTR_INFO (dr))
3211 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3212 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3217 return indx_before_incr;
3224 /* Function bump_vector_ptr
3226 Increment a pointer (to a vector type) by vector-size. If requested,
3227 i.e. if PTR-INCR is given, then also connect the new increment stmt
3228 to the existing def-use update-chain of the pointer, by modifying
3229 the PTR_INCR as illustrated below:
3231 The pointer def-use update-chain before this function:
3232 DATAREF_PTR = phi (p_0, p_2)
3234 PTR_INCR: p_2 = DATAREF_PTR + step
3236 The pointer def-use update-chain after this function:
3237 DATAREF_PTR = phi (p_0, p_2)
3239 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3241 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3244 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3246 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3247 the loop. The increment amount across iterations is expected
3249 BSI - location where the new update stmt is to be placed.
3250 STMT - the original scalar memory-access stmt that is being vectorized.
3251 BUMP - optional. The offset by which to bump the pointer. If not given,
3252 the offset is assumed to be vector_size.
3254 Output: Return NEW_DATAREF_PTR as illustrated above.
3259 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3260 gimple stmt, tree bump)
3262 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3263 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3264 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3265 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3266 tree update = TYPE_SIZE_UNIT (vectype);
3269 use_operand_p use_p;
3270 tree new_dataref_ptr;
3275 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3276 dataref_ptr, update);
3277 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3278 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3279 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3281 /* Copy the points-to information if it exists. */
3282 if (DR_PTR_INFO (dr))
3284 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3285 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3286 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3290 return new_dataref_ptr;
3292 /* Update the vector-pointer's cross-iteration increment. */
3293 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3295 tree use = USE_FROM_PTR (use_p);
3297 if (use == dataref_ptr)
3298 SET_USE (use_p, new_dataref_ptr);
3300 gcc_assert (tree_int_cst_compare (use, update) == 0);
3303 return new_dataref_ptr;
3307 /* Function vect_create_destination_var.
3309 Create a new temporary of type VECTYPE. */
3312 vect_create_destination_var (tree scalar_dest, tree vectype)
3315 const char *new_name;
3317 enum vect_var_kind kind;
3319 kind = vectype ? vect_simple_var : vect_scalar_var;
3320 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3322 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3324 new_name = get_name (scalar_dest);
3327 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3328 add_referenced_var (vec_dest);
3333 /* Function vect_strided_store_supported.
3335 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3336 and FALSE otherwise. */
3339 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3341 optab interleave_high_optab, interleave_low_optab;
3342 enum machine_mode mode;
3344 mode = TYPE_MODE (vectype);
3346 /* vect_permute_store_chain requires the group size to be a power of two. */
3347 if (exact_log2 (count) == -1)
3349 if (vect_print_dump_info (REPORT_DETAILS))
3350 fprintf (vect_dump, "the size of the group of strided accesses"
3351 " is not a power of 2");
3355 /* Check that the operation is supported. */
3356 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3357 vectype, optab_default);
3358 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3359 vectype, optab_default);
3360 if (!interleave_high_optab || !interleave_low_optab)
3362 if (vect_print_dump_info (REPORT_DETAILS))
3363 fprintf (vect_dump, "no optab for interleave.");
3367 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3368 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3370 if (vect_print_dump_info (REPORT_DETAILS))
3371 fprintf (vect_dump, "interleave op not supported by target.");
3379 /* Function vect_permute_store_chain.
3381 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3382 a power of 2, generate interleave_high/low stmts to reorder the data
3383 correctly for the stores. Return the final references for stores in
3386 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3387 The input is 4 vectors each containing 8 elements. We assign a number to
3388 each element, the input sequence is:
3390 1st vec: 0 1 2 3 4 5 6 7
3391 2nd vec: 8 9 10 11 12 13 14 15
3392 3rd vec: 16 17 18 19 20 21 22 23
3393 4th vec: 24 25 26 27 28 29 30 31
3395 The output sequence should be:
3397 1st vec: 0 8 16 24 1 9 17 25
3398 2nd vec: 2 10 18 26 3 11 19 27
3399 3rd vec: 4 12 20 28 5 13 21 30
3400 4th vec: 6 14 22 30 7 15 23 31
3402 i.e., we interleave the contents of the four vectors in their order.
3404 We use interleave_high/low instructions to create such output. The input of
3405 each interleave_high/low operation is two vectors:
3408 the even elements of the result vector are obtained left-to-right from the
3409 high/low elements of the first vector. The odd elements of the result are
3410 obtained left-to-right from the high/low elements of the second vector.
3411 The output of interleave_high will be: 0 4 1 5
3412 and of interleave_low: 2 6 3 7
3415 The permutation is done in log LENGTH stages. In each stage interleave_high
3416 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3417 where the first argument is taken from the first half of DR_CHAIN and the
3418 second argument from it's second half.
3421 I1: interleave_high (1st vec, 3rd vec)
3422 I2: interleave_low (1st vec, 3rd vec)
3423 I3: interleave_high (2nd vec, 4th vec)
3424 I4: interleave_low (2nd vec, 4th vec)
3426 The output for the first stage is:
3428 I1: 0 16 1 17 2 18 3 19
3429 I2: 4 20 5 21 6 22 7 23
3430 I3: 8 24 9 25 10 26 11 27
3431 I4: 12 28 13 29 14 30 15 31
3433 The output of the second stage, i.e. the final result is:
3435 I1: 0 8 16 24 1 9 17 25
3436 I2: 2 10 18 26 3 11 19 27
3437 I3: 4 12 20 28 5 13 21 30
3438 I4: 6 14 22 30 7 15 23 31. */
3441 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3442 unsigned int length,
3444 gimple_stmt_iterator *gsi,
3445 VEC(tree,heap) **result_chain)
3447 tree perm_dest, vect1, vect2, high, low;
3449 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3452 enum tree_code high_code, low_code;
3454 gcc_assert (vect_strided_store_supported (vectype, length));
3456 *result_chain = VEC_copy (tree, heap, dr_chain);
3458 for (i = 0; i < exact_log2 (length); i++)
3460 for (j = 0; j < length/2; j++)
3462 vect1 = VEC_index (tree, dr_chain, j);
3463 vect2 = VEC_index (tree, dr_chain, j+length/2);
3465 /* Create interleaving stmt:
3466 in the case of big endian:
3467 high = interleave_high (vect1, vect2)
3468 and in the case of little endian:
3469 high = interleave_low (vect1, vect2). */
3470 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3471 DECL_GIMPLE_REG_P (perm_dest) = 1;
3472 add_referenced_var (perm_dest);
3473 if (BYTES_BIG_ENDIAN)
3475 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3476 low_code = VEC_INTERLEAVE_LOW_EXPR;
3480 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3481 high_code = VEC_INTERLEAVE_LOW_EXPR;
3483 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3485 high = make_ssa_name (perm_dest, perm_stmt);
3486 gimple_assign_set_lhs (perm_stmt, high);
3487 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3488 VEC_replace (tree, *result_chain, 2*j, high);
3490 /* Create interleaving stmt:
3491 in the case of big endian:
3492 low = interleave_low (vect1, vect2)
3493 and in the case of little endian:
3494 low = interleave_high (vect1, vect2). */
3495 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3496 DECL_GIMPLE_REG_P (perm_dest) = 1;
3497 add_referenced_var (perm_dest);
3498 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3500 low = make_ssa_name (perm_dest, perm_stmt);
3501 gimple_assign_set_lhs (perm_stmt, low);
3502 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3503 VEC_replace (tree, *result_chain, 2*j+1, low);
3505 dr_chain = VEC_copy (tree, heap, *result_chain);
3509 /* Function vect_setup_realignment
3511 This function is called when vectorizing an unaligned load using
3512 the dr_explicit_realign[_optimized] scheme.
3513 This function generates the following code at the loop prolog:
3516 x msq_init = *(floor(p)); # prolog load
3517 realignment_token = call target_builtin;
3519 x msq = phi (msq_init, ---)
3521 The stmts marked with x are generated only for the case of
3522 dr_explicit_realign_optimized.
3524 The code above sets up a new (vector) pointer, pointing to the first
3525 location accessed by STMT, and a "floor-aligned" load using that pointer.
3526 It also generates code to compute the "realignment-token" (if the relevant
3527 target hook was defined), and creates a phi-node at the loop-header bb
3528 whose arguments are the result of the prolog-load (created by this
3529 function) and the result of a load that takes place in the loop (to be
3530 created by the caller to this function).
3532 For the case of dr_explicit_realign_optimized:
3533 The caller to this function uses the phi-result (msq) to create the
3534 realignment code inside the loop, and sets up the missing phi argument,
3537 msq = phi (msq_init, lsq)
3538 lsq = *(floor(p')); # load in loop
3539 result = realign_load (msq, lsq, realignment_token);
3541 For the case of dr_explicit_realign:
3543 msq = *(floor(p)); # load in loop
3545 lsq = *(floor(p')); # load in loop
3546 result = realign_load (msq, lsq, realignment_token);
3549 STMT - (scalar) load stmt to be vectorized. This load accesses
3550 a memory location that may be unaligned.
3551 BSI - place where new code is to be inserted.
3552 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3556 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3557 target hook, if defined.
3558 Return value - the result of the loop-header phi node. */
3561 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3562 tree *realignment_token,
3563 enum dr_alignment_support alignment_support_scheme,
3565 struct loop **at_loop)
3567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3568 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3569 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3570 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3571 struct loop *loop = NULL;
3573 tree scalar_dest = gimple_assign_lhs (stmt);
3580 tree msq_init = NULL_TREE;
3583 tree msq = NULL_TREE;
3584 gimple_seq stmts = NULL;
3586 bool compute_in_loop = false;
3587 bool nested_in_vect_loop = false;
3588 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3589 struct loop *loop_for_initial_load = NULL;
3593 loop = LOOP_VINFO_LOOP (loop_vinfo);
3594 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3597 gcc_assert (alignment_support_scheme == dr_explicit_realign
3598 || alignment_support_scheme == dr_explicit_realign_optimized);
3600 /* We need to generate three things:
3601 1. the misalignment computation
3602 2. the extra vector load (for the optimized realignment scheme).
3603 3. the phi node for the two vectors from which the realignment is
3604 done (for the optimized realignment scheme). */
3606 /* 1. Determine where to generate the misalignment computation.
3608 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3609 calculation will be generated by this function, outside the loop (in the
3610 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3611 caller, inside the loop.
3613 Background: If the misalignment remains fixed throughout the iterations of
3614 the loop, then both realignment schemes are applicable, and also the
3615 misalignment computation can be done outside LOOP. This is because we are
3616 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3617 are a multiple of VS (the Vector Size), and therefore the misalignment in
3618 different vectorized LOOP iterations is always the same.
3619 The problem arises only if the memory access is in an inner-loop nested
3620 inside LOOP, which is now being vectorized using outer-loop vectorization.
3621 This is the only case when the misalignment of the memory access may not
3622 remain fixed throughout the iterations of the inner-loop (as explained in
3623 detail in vect_supportable_dr_alignment). In this case, not only is the
3624 optimized realignment scheme not applicable, but also the misalignment
3625 computation (and generation of the realignment token that is passed to
3626 REALIGN_LOAD) have to be done inside the loop.
3628 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3629 or not, which in turn determines if the misalignment is computed inside
3630 the inner-loop, or outside LOOP. */
3632 if (init_addr != NULL_TREE || !loop_vinfo)
3634 compute_in_loop = true;
3635 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3639 /* 2. Determine where to generate the extra vector load.
3641 For the optimized realignment scheme, instead of generating two vector
3642 loads in each iteration, we generate a single extra vector load in the
3643 preheader of the loop, and in each iteration reuse the result of the
3644 vector load from the previous iteration. In case the memory access is in
3645 an inner-loop nested inside LOOP, which is now being vectorized using
3646 outer-loop vectorization, we need to determine whether this initial vector
3647 load should be generated at the preheader of the inner-loop, or can be
3648 generated at the preheader of LOOP. If the memory access has no evolution
3649 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3650 to be generated inside LOOP (in the preheader of the inner-loop). */
3652 if (nested_in_vect_loop)
3654 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3655 bool invariant_in_outerloop =
3656 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3657 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3660 loop_for_initial_load = loop;
3662 *at_loop = loop_for_initial_load;
3664 if (loop_for_initial_load)
3665 pe = loop_preheader_edge (loop_for_initial_load);
3667 /* 3. For the case of the optimized realignment, create the first vector
3668 load at the loop preheader. */
3670 if (alignment_support_scheme == dr_explicit_realign_optimized)
3672 /* Create msq_init = *(floor(p1)) in the loop preheader */
3674 gcc_assert (!compute_in_loop);
3675 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3676 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3677 NULL_TREE, &init_addr, NULL, &inc,
3679 new_stmt = gimple_build_assign_with_ops
3680 (BIT_AND_EXPR, NULL_TREE, ptr,
3681 build_int_cst (TREE_TYPE (ptr),
3682 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3683 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3684 gimple_assign_set_lhs (new_stmt, new_temp);
3685 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3686 gcc_assert (!new_bb);
3688 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3689 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3690 new_stmt = gimple_build_assign (vec_dest, data_ref);
3691 new_temp = make_ssa_name (vec_dest, new_stmt);
3692 gimple_assign_set_lhs (new_stmt, new_temp);
3693 mark_symbols_for_renaming (new_stmt);
3696 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3697 gcc_assert (!new_bb);
3700 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3702 msq_init = gimple_assign_lhs (new_stmt);
3705 /* 4. Create realignment token using a target builtin, if available.
3706 It is done either inside the containing loop, or before LOOP (as
3707 determined above). */
3709 if (targetm.vectorize.builtin_mask_for_load)
3713 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3716 /* Generate the INIT_ADDR computation outside LOOP. */
3717 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3721 pe = loop_preheader_edge (loop);
3722 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3723 gcc_assert (!new_bb);
3726 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3729 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3730 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3732 vect_create_destination_var (scalar_dest,
3733 gimple_call_return_type (new_stmt));
3734 new_temp = make_ssa_name (vec_dest, new_stmt);
3735 gimple_call_set_lhs (new_stmt, new_temp);
3737 if (compute_in_loop)
3738 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3741 /* Generate the misalignment computation outside LOOP. */
3742 pe = loop_preheader_edge (loop);
3743 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3744 gcc_assert (!new_bb);
3747 *realignment_token = gimple_call_lhs (new_stmt);
3749 /* The result of the CALL_EXPR to this builtin is determined from
3750 the value of the parameter and no global variables are touched
3751 which makes the builtin a "const" function. Requiring the
3752 builtin to have the "const" attribute makes it unnecessary
3753 to call mark_call_clobbered. */
3754 gcc_assert (TREE_READONLY (builtin_decl));
3757 if (alignment_support_scheme == dr_explicit_realign)
3760 gcc_assert (!compute_in_loop);
3761 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3764 /* 5. Create msq = phi <msq_init, lsq> in loop */
3766 pe = loop_preheader_edge (containing_loop);
3767 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3768 msq = make_ssa_name (vec_dest, NULL);
3769 phi_stmt = create_phi_node (msq, containing_loop->header);
3770 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3771 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3777 /* Function vect_strided_load_supported.
3779 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3780 and FALSE otherwise. */
3783 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3785 optab perm_even_optab, perm_odd_optab;
3786 enum machine_mode mode;
3788 mode = TYPE_MODE (vectype);
3790 /* vect_permute_load_chain requires the group size to be a power of two. */
3791 if (exact_log2 (count) == -1)
3793 if (vect_print_dump_info (REPORT_DETAILS))
3794 fprintf (vect_dump, "the size of the group of strided accesses"
3795 " is not a power of 2");
3799 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3801 if (!perm_even_optab)
3803 if (vect_print_dump_info (REPORT_DETAILS))
3804 fprintf (vect_dump, "no optab for perm_even.");
3808 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3810 if (vect_print_dump_info (REPORT_DETAILS))
3811 fprintf (vect_dump, "perm_even op not supported by target.");
3815 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3817 if (!perm_odd_optab)
3819 if (vect_print_dump_info (REPORT_DETAILS))
3820 fprintf (vect_dump, "no optab for perm_odd.");
3824 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3826 if (vect_print_dump_info (REPORT_DETAILS))
3827 fprintf (vect_dump, "perm_odd op not supported by target.");
3834 /* Function vect_permute_load_chain.
3836 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3837 a power of 2, generate extract_even/odd stmts to reorder the input data
3838 correctly. Return the final references for loads in RESULT_CHAIN.
3840 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3841 The input is 4 vectors each containing 8 elements. We assign a number to each
3842 element, the input sequence is:
3844 1st vec: 0 1 2 3 4 5 6 7
3845 2nd vec: 8 9 10 11 12 13 14 15
3846 3rd vec: 16 17 18 19 20 21 22 23
3847 4th vec: 24 25 26 27 28 29 30 31
3849 The output sequence should be:
3851 1st vec: 0 4 8 12 16 20 24 28
3852 2nd vec: 1 5 9 13 17 21 25 29
3853 3rd vec: 2 6 10 14 18 22 26 30
3854 4th vec: 3 7 11 15 19 23 27 31
3856 i.e., the first output vector should contain the first elements of each
3857 interleaving group, etc.
3859 We use extract_even/odd instructions to create such output. The input of
3860 each extract_even/odd operation is two vectors
3864 and the output is the vector of extracted even/odd elements. The output of
3865 extract_even will be: 0 2 4 6
3866 and of extract_odd: 1 3 5 7
3869 The permutation is done in log LENGTH stages. In each stage extract_even
3870 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3871 their order. In our example,
3873 E1: extract_even (1st vec, 2nd vec)
3874 E2: extract_odd (1st vec, 2nd vec)
3875 E3: extract_even (3rd vec, 4th vec)
3876 E4: extract_odd (3rd vec, 4th vec)
3878 The output for the first stage will be:
3880 E1: 0 2 4 6 8 10 12 14
3881 E2: 1 3 5 7 9 11 13 15
3882 E3: 16 18 20 22 24 26 28 30
3883 E4: 17 19 21 23 25 27 29 31
3885 In order to proceed and create the correct sequence for the next stage (or
3886 for the correct output, if the second stage is the last one, as in our
3887 example), we first put the output of extract_even operation and then the
3888 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3889 The input for the second stage is:
3891 1st vec (E1): 0 2 4 6 8 10 12 14
3892 2nd vec (E3): 16 18 20 22 24 26 28 30
3893 3rd vec (E2): 1 3 5 7 9 11 13 15
3894 4th vec (E4): 17 19 21 23 25 27 29 31
3896 The output of the second stage:
3898 E1: 0 4 8 12 16 20 24 28
3899 E2: 2 6 10 14 18 22 26 30
3900 E3: 1 5 9 13 17 21 25 29
3901 E4: 3 7 11 15 19 23 27 31
3903 And RESULT_CHAIN after reordering:
3905 1st vec (E1): 0 4 8 12 16 20 24 28
3906 2nd vec (E3): 1 5 9 13 17 21 25 29
3907 3rd vec (E2): 2 6 10 14 18 22 26 30
3908 4th vec (E4): 3 7 11 15 19 23 27 31. */
3911 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3912 unsigned int length,
3914 gimple_stmt_iterator *gsi,
3915 VEC(tree,heap) **result_chain)
3917 tree perm_dest, data_ref, first_vect, second_vect;
3919 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3923 gcc_assert (vect_strided_load_supported (vectype, length));
3925 *result_chain = VEC_copy (tree, heap, dr_chain);
3926 for (i = 0; i < exact_log2 (length); i++)
3928 for (j = 0; j < length; j +=2)
3930 first_vect = VEC_index (tree, dr_chain, j);
3931 second_vect = VEC_index (tree, dr_chain, j+1);
3933 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3934 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3935 DECL_GIMPLE_REG_P (perm_dest) = 1;
3936 add_referenced_var (perm_dest);
3938 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3939 perm_dest, first_vect,
3942 data_ref = make_ssa_name (perm_dest, perm_stmt);
3943 gimple_assign_set_lhs (perm_stmt, data_ref);
3944 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3945 mark_symbols_for_renaming (perm_stmt);
3947 VEC_replace (tree, *result_chain, j/2, data_ref);
3949 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3950 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3951 DECL_GIMPLE_REG_P (perm_dest) = 1;
3952 add_referenced_var (perm_dest);
3954 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3955 perm_dest, first_vect,
3957 data_ref = make_ssa_name (perm_dest, perm_stmt);
3958 gimple_assign_set_lhs (perm_stmt, data_ref);
3959 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3960 mark_symbols_for_renaming (perm_stmt);
3962 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3964 dr_chain = VEC_copy (tree, heap, *result_chain);
3969 /* Function vect_transform_strided_load.
3971 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3972 to perform their permutation and ascribe the result vectorized statements to
3973 the scalar statements.
3977 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3978 gimple_stmt_iterator *gsi)
3980 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3981 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3982 gimple next_stmt, new_stmt;
3983 VEC(tree,heap) *result_chain = NULL;
3984 unsigned int i, gap_count;
3987 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3988 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3989 vectors, that are ready for vector computation. */
3990 result_chain = VEC_alloc (tree, heap, size);
3992 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
3994 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3995 Since we scan the chain starting from it's first node, their order
3996 corresponds the order of data-refs in RESULT_CHAIN. */
3997 next_stmt = first_stmt;
3999 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4004 /* Skip the gaps. Loads created for the gaps will be removed by dead
4005 code elimination pass later. No need to check for the first stmt in
4006 the group, since it always exists.
4007 DR_GROUP_GAP is the number of steps in elements from the previous
4008 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4009 correspond to the gaps. */
4010 if (next_stmt != first_stmt
4011 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4019 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4020 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4021 copies, and we put the new vector statement in the first available
4023 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4024 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4027 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4030 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4032 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4035 prev_stmt = rel_stmt;
4037 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4040 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4045 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4047 /* If NEXT_STMT accesses the same DR as the previous statement,
4048 put the same TMP_DATA_REF as its vectorized statement; otherwise
4049 get the next data-ref from RESULT_CHAIN. */
4050 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4055 VEC_free (tree, heap, result_chain);
4058 /* Function vect_force_dr_alignment_p.
4060 Returns whether the alignment of a DECL can be forced to be aligned
4061 on ALIGNMENT bit boundary. */
4064 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4066 if (TREE_CODE (decl) != VAR_DECL)
4069 if (DECL_EXTERNAL (decl))
4072 if (TREE_ASM_WRITTEN (decl))
4075 if (TREE_STATIC (decl))
4076 return (alignment <= MAX_OFILE_ALIGNMENT);
4078 return (alignment <= MAX_STACK_ALIGNMENT);
4082 /* Return whether the data reference DR is supported with respect to its
4084 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4085 it is aligned, i.e., check if it is possible to vectorize it with different
4088 enum dr_alignment_support
4089 vect_supportable_dr_alignment (struct data_reference *dr,
4090 bool check_aligned_accesses)
4092 gimple stmt = DR_STMT (dr);
4093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4094 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4095 enum machine_mode mode = TYPE_MODE (vectype);
4096 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4097 struct loop *vect_loop = NULL;
4098 bool nested_in_vect_loop = false;
4100 if (aligned_access_p (dr) && !check_aligned_accesses)
4105 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4106 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4109 /* Possibly unaligned access. */
4111 /* We can choose between using the implicit realignment scheme (generating
4112 a misaligned_move stmt) and the explicit realignment scheme (generating
4113 aligned loads with a REALIGN_LOAD). There are two variants to the
4114 explicit realignment scheme: optimized, and unoptimized.
4115 We can optimize the realignment only if the step between consecutive
4116 vector loads is equal to the vector size. Since the vector memory
4117 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4118 is guaranteed that the misalignment amount remains the same throughout the
4119 execution of the vectorized loop. Therefore, we can create the
4120 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4121 at the loop preheader.
4123 However, in the case of outer-loop vectorization, when vectorizing a
4124 memory access in the inner-loop nested within the LOOP that is now being
4125 vectorized, while it is guaranteed that the misalignment of the
4126 vectorized memory access will remain the same in different outer-loop
4127 iterations, it is *not* guaranteed that is will remain the same throughout
4128 the execution of the inner-loop. This is because the inner-loop advances
4129 with the original scalar step (and not in steps of VS). If the inner-loop
4130 step happens to be a multiple of VS, then the misalignment remains fixed
4131 and we can use the optimized realignment scheme. For example:
4137 When vectorizing the i-loop in the above example, the step between
4138 consecutive vector loads is 1, and so the misalignment does not remain
4139 fixed across the execution of the inner-loop, and the realignment cannot
4140 be optimized (as illustrated in the following pseudo vectorized loop):
4142 for (i=0; i<N; i+=4)
4143 for (j=0; j<M; j++){
4144 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4145 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4146 // (assuming that we start from an aligned address).
4149 We therefore have to use the unoptimized realignment scheme:
4151 for (i=0; i<N; i+=4)
4152 for (j=k; j<M; j+=4)
4153 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4154 // that the misalignment of the initial address is
4157 The loop can then be vectorized as follows:
4159 for (k=0; k<4; k++){
4160 rt = get_realignment_token (&vp[k]);
4161 for (i=0; i<N; i+=4){
4163 for (j=k; j<M; j+=4){
4165 va = REALIGN_LOAD <v1,v2,rt>;
4172 if (DR_IS_READ (dr))
4174 bool is_packed = false;
4175 tree type = (TREE_TYPE (DR_REF (dr)));
4177 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4178 && (!targetm.vectorize.builtin_mask_for_load
4179 || targetm.vectorize.builtin_mask_for_load ()))
4181 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4182 if ((nested_in_vect_loop
4183 && (TREE_INT_CST_LOW (DR_STEP (dr))
4184 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4186 return dr_explicit_realign;
4188 return dr_explicit_realign_optimized;
4190 if (!known_alignment_for_access_p (dr))
4192 tree ba = DR_BASE_OBJECT (dr);
4195 is_packed = contains_packed_reference (ba);
4198 if (targetm.vectorize.
4199 support_vector_misalignment (mode, type,
4200 DR_MISALIGNMENT (dr), is_packed))
4201 /* Can't software pipeline the loads, but can at least do them. */
4202 return dr_unaligned_supported;
4206 bool is_packed = false;
4207 tree type = (TREE_TYPE (DR_REF (dr)));
4209 if (!known_alignment_for_access_p (dr))
4211 tree ba = DR_BASE_OBJECT (dr);
4214 is_packed = contains_packed_reference (ba);
4217 if (targetm.vectorize.
4218 support_vector_misalignment (mode, type,
4219 DR_MISALIGNMENT (dr), is_packed))
4220 return dr_unaligned_supported;
4224 return dr_unaligned_unsupported;