1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
141 while (next_stmt && next_stmt != stmt)
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
205 NEXT(FIRST_DR) = previous FIRST_DR
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
344 /* We only call this function for pairs of loads and stores, but we verify
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf,
559 bool *data_dependence_in_bb)
562 struct loop *loop = NULL;
563 struct data_reference *dra = DDR_A (ddr);
564 struct data_reference *drb = DDR_B (ddr);
565 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
566 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
567 lambda_vector dist_v;
568 unsigned int loop_depth;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
575 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra, drb);
583 loop = LOOP_VINFO_LOOP (loop_vinfo);
585 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
594 fprintf (vect_dump, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
597 fprintf (vect_dump, " and ");
598 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
605 /* When vectorizing a basic block unknown depnedence can still mean
607 if (vect_check_interleaving (dra, drb))
610 if (vect_print_dump_info (REPORT_DR_DETAILS))
612 fprintf (vect_dump, "can't determine dependence between ");
613 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
614 fprintf (vect_dump, " and ");
615 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618 /* We do not vectorize basic blocks with write-write dependencies. */
619 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
622 /* We deal with read-write dependencies in basic blocks later (by
623 verifying that all the loads in the basic block are before all the
625 *data_dependence_in_bb = true;
629 /* Versioning for alias is not yet supported for basic block SLP, and
630 dependence distance is unapplicable, hence, in case of known data
631 dependence, basic block vectorization is impossible for now. */
634 if (dra != drb && vect_check_interleaving (dra, drb))
637 if (vect_print_dump_info (REPORT_DR_DETAILS))
639 fprintf (vect_dump, "determined dependence between ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 /* Do not vectorize basic blcoks with write-write dependences. */
646 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
649 /* Check if this dependence is allowed in basic block vectorization. */
650 return vect_drs_dependent_in_basic_block (dra, drb);
653 /* Loop-based vectorization and known data dependence. */
654 if (DDR_NUM_DIST_VECTS (ddr) == 0)
656 if (vect_print_dump_info (REPORT_DR_DETAILS))
658 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Add to list of ddrs that need to be tested at run-time. */
664 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
667 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
668 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
670 int dist = dist_v[loop_depth];
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance = %d.", dist);
677 if (vect_print_dump_info (REPORT_DR_DETAILS))
679 fprintf (vect_dump, "dependence distance == 0 between ");
680 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
681 fprintf (vect_dump, " and ");
682 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
685 /* For interleaving, mark that there is a read-write dependency if
686 necessary. We check before that one of the data-refs is store. */
687 if (DR_IS_READ (dra))
688 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
691 if (DR_IS_READ (drb))
692 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
698 if (dist > 0 && DDR_REVERSED_P (ddr))
700 /* If DDR_REVERSED_P the order of the data-refs in DDR was
701 reversed (to make distance vector positive), and the actual
702 distance is negative. */
703 if (vect_print_dump_info (REPORT_DR_DETAILS))
704 fprintf (vect_dump, "dependence distance negative.");
709 && abs (dist) < *max_vf)
711 /* The dependence distance requires reduction of the maximal
712 vectorization factor. */
713 *max_vf = abs (dist);
714 if (vect_print_dump_info (REPORT_DR_DETAILS))
715 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
719 if (abs (dist) >= *max_vf)
721 /* Dependence distance does not create dependence, as far as
722 vectorization is concerned, in this case. */
723 if (vect_print_dump_info (REPORT_DR_DETAILS))
724 fprintf (vect_dump, "dependence distance >= VF.");
728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
730 fprintf (vect_dump, "not vectorized, possible dependence "
731 "between data-refs ");
732 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
733 fprintf (vect_dump, " and ");
734 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
743 /* Function vect_analyze_data_ref_dependences.
745 Examine all the data references in the loop, and make sure there do not
746 exist any data dependences between them. Set *MAX_VF according to
747 the maximum vectorization factor the data dependences allow. */
750 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
751 bb_vec_info bb_vinfo, int *max_vf,
752 bool *data_dependence_in_bb)
755 VEC (ddr_p, heap) *ddrs = NULL;
756 struct data_dependence_relation *ddr;
758 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "=== vect_analyze_dependences ===");
762 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
764 ddrs = BB_VINFO_DDRS (bb_vinfo);
766 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
767 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
768 data_dependence_in_bb))
775 /* Function vect_compute_data_ref_alignment
777 Compute the misalignment of the data reference DR.
780 1. If during the misalignment computation it is found that the data reference
781 cannot be vectorized then false is returned.
782 2. DR_MISALIGNMENT (DR) is defined.
784 FOR NOW: No analysis is actually performed. Misalignment is calculated
785 only for trivial cases. TODO. */
788 vect_compute_data_ref_alignment (struct data_reference *dr)
790 gimple stmt = DR_STMT (dr);
791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
793 struct loop *loop = NULL;
794 tree ref = DR_REF (dr);
796 tree base, base_addr;
799 tree aligned_to, alignment;
801 if (vect_print_dump_info (REPORT_DETAILS))
802 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
805 loop = LOOP_VINFO_LOOP (loop_vinfo);
807 /* Initialize misalignment to unknown. */
808 SET_DR_MISALIGNMENT (dr, -1);
810 misalign = DR_INIT (dr);
811 aligned_to = DR_ALIGNED_TO (dr);
812 base_addr = DR_BASE_ADDRESS (dr);
813 vectype = STMT_VINFO_VECTYPE (stmt_info);
815 /* In case the dataref is in an inner-loop of the loop that is being
816 vectorized (LOOP), we use the base and misalignment information
817 relative to the outer-loop (LOOP). This is ok only if the misalignment
818 stays the same throughout the execution of the inner-loop, which is why
819 we have to check that the stride of the dataref in the inner-loop evenly
820 divides by the vector size. */
821 if (loop && nested_in_vect_loop_p (loop, stmt))
823 tree step = DR_STEP (dr);
824 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
826 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
828 if (vect_print_dump_info (REPORT_ALIGNMENT))
829 fprintf (vect_dump, "inner step divides the vector-size.");
830 misalign = STMT_VINFO_DR_INIT (stmt_info);
831 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
832 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
836 if (vect_print_dump_info (REPORT_ALIGNMENT))
837 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
838 misalign = NULL_TREE;
842 base = build_fold_indirect_ref (base_addr);
843 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
845 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
848 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "Unknown alignment for access: ");
851 print_generic_expr (vect_dump, base, TDF_SLIM);
857 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
859 || (TREE_CODE (base_addr) == SSA_NAME
860 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
861 TREE_TYPE (base_addr)))),
865 base_aligned = false;
869 /* Do not change the alignment of global variables if
870 flag_section_anchors is enabled. */
871 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
872 || (TREE_STATIC (base) && flag_section_anchors))
874 if (vect_print_dump_info (REPORT_DETAILS))
876 fprintf (vect_dump, "can't force alignment of ref: ");
877 print_generic_expr (vect_dump, ref, TDF_SLIM);
882 /* Force the alignment of the decl.
883 NOTE: This is the only change to the code we make during
884 the analysis phase, before deciding to vectorize the loop. */
885 if (vect_print_dump_info (REPORT_DETAILS))
887 fprintf (vect_dump, "force alignment of ");
888 print_generic_expr (vect_dump, ref, TDF_SLIM);
891 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
892 DECL_USER_ALIGN (base) = 1;
895 /* At this point we assume that the base is aligned. */
896 gcc_assert (base_aligned
897 || (TREE_CODE (base) == VAR_DECL
898 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
900 /* If this is a backward running DR then first access in the larger
901 vectype actually is N-1 elements before the address in the DR.
902 Adjust misalign accordingly. */
903 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
905 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
906 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
907 otherwise we wouldn't be here. */
908 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
909 /* PLUS because DR_STEP was negative. */
910 misalign = size_binop (PLUS_EXPR, misalign, offset);
913 /* Modulo alignment. */
914 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
916 if (!host_integerp (misalign, 1))
918 /* Negative or overflowed misalignment value. */
919 if (vect_print_dump_info (REPORT_DETAILS))
920 fprintf (vect_dump, "unexpected misalign value");
924 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
926 if (vect_print_dump_info (REPORT_DETAILS))
928 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
929 print_generic_expr (vect_dump, ref, TDF_SLIM);
936 /* Function vect_compute_data_refs_alignment
938 Compute the misalignment of data references in the loop.
939 Return FALSE if a data reference is found that cannot be vectorized. */
942 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
943 bb_vec_info bb_vinfo)
945 VEC (data_reference_p, heap) *datarefs;
946 struct data_reference *dr;
950 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
952 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
954 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
955 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
956 && !vect_compute_data_ref_alignment (dr))
960 /* Mark unsupported statement as unvectorizable. */
961 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
972 /* Function vect_update_misalignment_for_peel
974 DR - the data reference whose misalignment is to be adjusted.
975 DR_PEEL - the data reference whose misalignment is being made
976 zero in the vector loop by the peel.
977 NPEEL - the number of iterations in the peel loop if the misalignment
978 of DR_PEEL is known at compile time. */
981 vect_update_misalignment_for_peel (struct data_reference *dr,
982 struct data_reference *dr_peel, int npeel)
985 VEC(dr_p,heap) *same_align_drs;
986 struct data_reference *current_dr;
987 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
988 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
989 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
990 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
992 /* For interleaved data accesses the step in the loop must be multiplied by
993 the size of the interleaving group. */
994 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
995 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
996 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
997 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
999 /* It can be assumed that the data refs with the same alignment as dr_peel
1000 are aligned in the vector loop. */
1002 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1003 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1005 if (current_dr != dr)
1007 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1008 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1009 SET_DR_MISALIGNMENT (dr, 0);
1013 if (known_alignment_for_access_p (dr)
1014 && known_alignment_for_access_p (dr_peel))
1016 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1017 int misal = DR_MISALIGNMENT (dr);
1018 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1019 misal += negative ? -npeel * dr_size : npeel * dr_size;
1020 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1021 SET_DR_MISALIGNMENT (dr, misal);
1025 if (vect_print_dump_info (REPORT_DETAILS))
1026 fprintf (vect_dump, "Setting misalignment to -1.");
1027 SET_DR_MISALIGNMENT (dr, -1);
1031 /* Function vect_verify_datarefs_alignment
1033 Return TRUE if all data references in the loop can be
1034 handled with respect to alignment. */
1037 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1039 VEC (data_reference_p, heap) *datarefs;
1040 struct data_reference *dr;
1041 enum dr_alignment_support supportable_dr_alignment;
1045 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1047 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1049 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1051 gimple stmt = DR_STMT (dr);
1052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1054 /* For interleaving, only the alignment of the first access matters.
1055 Skip statements marked as not vectorizable. */
1056 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1057 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1058 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1061 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1062 if (!supportable_dr_alignment)
1064 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1066 if (DR_IS_READ (dr))
1068 "not vectorized: unsupported unaligned load.");
1071 "not vectorized: unsupported unaligned store.");
1073 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1077 if (supportable_dr_alignment != dr_aligned
1078 && vect_print_dump_info (REPORT_ALIGNMENT))
1079 fprintf (vect_dump, "Vectorizing an unaligned access.");
1085 /* Function vector_alignment_reachable_p
1087 Return true if vector alignment for DR is reachable by peeling
1088 a few loop iterations. Return false otherwise. */
1091 vector_alignment_reachable_p (struct data_reference *dr)
1093 gimple stmt = DR_STMT (dr);
1094 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1095 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1097 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1099 /* For interleaved access we peel only if number of iterations in
1100 the prolog loop ({VF - misalignment}), is a multiple of the
1101 number of the interleaved accesses. */
1102 int elem_size, mis_in_elements;
1103 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1105 /* FORNOW: handle only known alignment. */
1106 if (!known_alignment_for_access_p (dr))
1109 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1110 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1112 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1116 /* If misalignment is known at the compile time then allow peeling
1117 only if natural alignment is reachable through peeling. */
1118 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1120 HOST_WIDE_INT elmsize =
1121 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1122 if (vect_print_dump_info (REPORT_DETAILS))
1124 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1125 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1127 if (DR_MISALIGNMENT (dr) % elmsize)
1129 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1135 if (!known_alignment_for_access_p (dr))
1137 tree type = (TREE_TYPE (DR_REF (dr)));
1138 tree ba = DR_BASE_OBJECT (dr);
1139 bool is_packed = false;
1142 is_packed = contains_packed_reference (ba);
1144 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1147 if (vect_print_dump_info (REPORT_DETAILS))
1148 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1149 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1159 /* Calculate the cost of the memory access represented by DR. */
1162 vect_get_data_access_cost (struct data_reference *dr,
1163 unsigned int *inside_cost,
1164 unsigned int *outside_cost)
1166 gimple stmt = DR_STMT (dr);
1167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1168 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1169 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1170 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1171 int ncopies = vf / nunits;
1172 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1174 if (!supportable_dr_alignment)
1175 *inside_cost = VECT_MAX_COST;
1178 if (DR_IS_READ (dr))
1179 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1181 vect_get_store_cost (dr, ncopies, inside_cost);
1184 if (vect_print_dump_info (REPORT_COST))
1185 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1186 "outside_cost = %d.", *inside_cost, *outside_cost);
1191 vect_peeling_hash (const void *elem)
1193 const struct _vect_peel_info *peel_info;
1195 peel_info = (const struct _vect_peel_info *) elem;
1196 return (hashval_t) peel_info->npeel;
1201 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1203 const struct _vect_peel_info *a, *b;
1205 a = (const struct _vect_peel_info *) elem1;
1206 b = (const struct _vect_peel_info *) elem2;
1207 return (a->npeel == b->npeel);
1211 /* Insert DR into peeling hash table with NPEEL as key. */
1214 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1217 struct _vect_peel_info elem, *slot;
1219 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1222 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1228 slot = XNEW (struct _vect_peel_info);
1229 slot->npeel = npeel;
1232 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1237 if (!supportable_dr_alignment && !flag_vect_cost_model)
1238 slot->count += VECT_MAX_COST;
1242 /* Traverse peeling hash table to find peeling option that aligns maximum
1243 number of data accesses. */
1246 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1248 vect_peel_info elem = (vect_peel_info) *slot;
1249 vect_peel_extended_info max = (vect_peel_extended_info) data;
1251 if (elem->count > max->peel_info.count
1252 || (elem->count == max->peel_info.count
1253 && max->peel_info.npeel > elem->npeel))
1255 max->peel_info.npeel = elem->npeel;
1256 max->peel_info.count = elem->count;
1257 max->peel_info.dr = elem->dr;
1264 /* Traverse peeling hash table and calculate cost for each peeling option.
1265 Find the one with the lowest cost. */
1268 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1270 vect_peel_info elem = (vect_peel_info) *slot;
1271 vect_peel_extended_info min = (vect_peel_extended_info) data;
1272 int save_misalignment, dummy;
1273 unsigned int inside_cost = 0, outside_cost = 0, i;
1274 gimple stmt = DR_STMT (elem->dr);
1275 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1276 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1277 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1278 struct data_reference *dr;
1280 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1282 stmt = DR_STMT (dr);
1283 stmt_info = vinfo_for_stmt (stmt);
1284 /* For interleaving, only the alignment of the first access
1286 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1287 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1290 save_misalignment = DR_MISALIGNMENT (dr);
1291 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1292 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1293 SET_DR_MISALIGNMENT (dr, save_misalignment);
1296 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1297 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1299 if (inside_cost < min->inside_cost
1300 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1302 min->inside_cost = inside_cost;
1303 min->outside_cost = outside_cost;
1304 min->peel_info.dr = elem->dr;
1305 min->peel_info.npeel = elem->npeel;
1312 /* Choose best peeling option by traversing peeling hash table and either
1313 choosing an option with the lowest cost (if cost model is enabled) or the
1314 option that aligns as many accesses as possible. */
1316 static struct data_reference *
1317 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1318 unsigned int *npeel)
1320 struct _vect_peel_extended_info res;
1322 res.peel_info.dr = NULL;
1324 if (flag_vect_cost_model)
1326 res.inside_cost = INT_MAX;
1327 res.outside_cost = INT_MAX;
1328 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1329 vect_peeling_hash_get_lowest_cost, &res);
1333 res.peel_info.count = 0;
1334 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1335 vect_peeling_hash_get_most_frequent, &res);
1338 *npeel = res.peel_info.npeel;
1339 return res.peel_info.dr;
1343 /* Function vect_enhance_data_refs_alignment
1345 This pass will use loop versioning and loop peeling in order to enhance
1346 the alignment of data references in the loop.
1348 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1349 original loop is to be vectorized. Any other loops that are created by
1350 the transformations performed in this pass - are not supposed to be
1351 vectorized. This restriction will be relaxed.
1353 This pass will require a cost model to guide it whether to apply peeling
1354 or versioning or a combination of the two. For example, the scheme that
1355 intel uses when given a loop with several memory accesses, is as follows:
1356 choose one memory access ('p') which alignment you want to force by doing
1357 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1358 other accesses are not necessarily aligned, or (2) use loop versioning to
1359 generate one loop in which all accesses are aligned, and another loop in
1360 which only 'p' is necessarily aligned.
1362 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1363 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1364 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1366 Devising a cost model is the most critical aspect of this work. It will
1367 guide us on which access to peel for, whether to use loop versioning, how
1368 many versions to create, etc. The cost model will probably consist of
1369 generic considerations as well as target specific considerations (on
1370 powerpc for example, misaligned stores are more painful than misaligned
1373 Here are the general steps involved in alignment enhancements:
1375 -- original loop, before alignment analysis:
1376 for (i=0; i<N; i++){
1377 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1378 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1381 -- After vect_compute_data_refs_alignment:
1382 for (i=0; i<N; i++){
1383 x = q[i]; # DR_MISALIGNMENT(q) = 3
1384 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1387 -- Possibility 1: we do loop versioning:
1389 for (i=0; i<N; i++){ # loop 1A
1390 x = q[i]; # DR_MISALIGNMENT(q) = 3
1391 p[i] = y; # DR_MISALIGNMENT(p) = 0
1395 for (i=0; i<N; i++){ # loop 1B
1396 x = q[i]; # DR_MISALIGNMENT(q) = 3
1397 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1401 -- Possibility 2: we do loop peeling:
1402 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1406 for (i = 3; i < N; i++){ # loop 2A
1407 x = q[i]; # DR_MISALIGNMENT(q) = 0
1408 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1411 -- Possibility 3: combination of loop peeling and versioning:
1412 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1417 for (i = 3; i<N; i++){ # loop 3A
1418 x = q[i]; # DR_MISALIGNMENT(q) = 0
1419 p[i] = y; # DR_MISALIGNMENT(p) = 0
1423 for (i = 3; i<N; i++){ # loop 3B
1424 x = q[i]; # DR_MISALIGNMENT(q) = 0
1425 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1429 These loops are later passed to loop_transform to be vectorized. The
1430 vectorizer will use the alignment information to guide the transformation
1431 (whether to generate regular loads/stores, or with special handling for
1435 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1437 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1438 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1439 enum dr_alignment_support supportable_dr_alignment;
1440 struct data_reference *dr0 = NULL, *first_store = NULL;
1441 struct data_reference *dr;
1443 bool do_peeling = false;
1444 bool do_versioning = false;
1447 stmt_vec_info stmt_info;
1448 int vect_versioning_for_alias_required;
1449 unsigned int npeel = 0;
1450 bool all_misalignments_unknown = true;
1451 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1452 unsigned possible_npeel_number = 1;
1454 unsigned int nelements, mis, same_align_drs_max = 0;
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1459 /* While cost model enhancements are expected in the future, the high level
1460 view of the code at this time is as follows:
1462 A) If there is a misaligned access then see if peeling to align
1463 this access can make all data references satisfy
1464 vect_supportable_dr_alignment. If so, update data structures
1465 as needed and return true.
1467 B) If peeling wasn't possible and there is a data reference with an
1468 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1469 then see if loop versioning checks can be used to make all data
1470 references satisfy vect_supportable_dr_alignment. If so, update
1471 data structures as needed and return true.
1473 C) If neither peeling nor versioning were successful then return false if
1474 any data reference does not satisfy vect_supportable_dr_alignment.
1476 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1478 Note, Possibility 3 above (which is peeling and versioning together) is not
1479 being done at this time. */
1481 /* (1) Peeling to force alignment. */
1483 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1485 + How many accesses will become aligned due to the peeling
1486 - How many accesses will become unaligned due to the peeling,
1487 and the cost of misaligned accesses.
1488 - The cost of peeling (the extra runtime checks, the increase
1491 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1493 stmt = DR_STMT (dr);
1494 stmt_info = vinfo_for_stmt (stmt);
1496 /* For interleaving, only the alignment of the first access
1498 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1499 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1502 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1503 do_peeling = vector_alignment_reachable_p (dr);
1506 if (known_alignment_for_access_p (dr))
1508 unsigned int npeel_tmp;
1509 bool negative = tree_int_cst_compare (DR_STEP (dr),
1510 size_zero_node) < 0;
1512 /* Save info about DR in the hash table. */
1513 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1514 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1515 htab_create (1, vect_peeling_hash,
1516 vect_peeling_hash_eq, free);
1518 vectype = STMT_VINFO_VECTYPE (stmt_info);
1519 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1520 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1521 TREE_TYPE (DR_REF (dr))));
1522 npeel_tmp = (negative
1523 ? (mis - nelements) : (nelements - mis))
1526 /* For multiple types, it is possible that the bigger type access
1527 will have more than one peeling option. E.g., a loop with two
1528 types: one of size (vector size / 4), and the other one of
1529 size (vector size / 8). Vectorization factor will 8. If both
1530 access are misaligned by 3, the first one needs one scalar
1531 iteration to be aligned, and the second one needs 5. But the
1532 the first one will be aligned also by peeling 5 scalar
1533 iterations, and in that case both accesses will be aligned.
1534 Hence, except for the immediate peeling amount, we also want
1535 to try to add full vector size, while we don't exceed
1536 vectorization factor.
1537 We do this automtically for cost model, since we calculate cost
1538 for every peeling option. */
1539 if (!flag_vect_cost_model)
1540 possible_npeel_number = vf /nelements;
1542 /* Handle the aligned case. We may decide to align some other
1543 access, making DR unaligned. */
1544 if (DR_MISALIGNMENT (dr) == 0)
1547 if (!flag_vect_cost_model)
1548 possible_npeel_number++;
1551 for (j = 0; j < possible_npeel_number; j++)
1553 gcc_assert (npeel_tmp <= vf);
1554 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1555 npeel_tmp += nelements;
1558 all_misalignments_unknown = false;
1559 /* Data-ref that was chosen for the case that all the
1560 misalignments are unknown is not relevant anymore, since we
1561 have a data-ref with known alignment. */
1566 /* If we don't know all the misalignment values, we prefer
1567 peeling for data-ref that has maximum number of data-refs
1568 with the same alignment, unless the target prefers to align
1569 stores over load. */
1570 if (all_misalignments_unknown)
1572 if (same_align_drs_max < VEC_length (dr_p,
1573 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1576 same_align_drs_max = VEC_length (dr_p,
1577 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1581 if (!first_store && DR_IS_WRITE (dr))
1585 /* If there are both known and unknown misaligned accesses in the
1586 loop, we choose peeling amount according to the known
1590 if (!supportable_dr_alignment)
1593 if (!first_store && DR_IS_WRITE (dr))
1600 if (!aligned_access_p (dr))
1602 if (vect_print_dump_info (REPORT_DETAILS))
1603 fprintf (vect_dump, "vector alignment may not be reachable");
1610 vect_versioning_for_alias_required
1611 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1613 /* Temporarily, if versioning for alias is required, we disable peeling
1614 until we support peeling and versioning. Often peeling for alignment
1615 will require peeling for loop-bound, which in turn requires that we
1616 know how to adjust the loop ivs after the loop. */
1617 if (vect_versioning_for_alias_required
1618 || !vect_can_advance_ivs_p (loop_vinfo)
1619 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1622 if (do_peeling && all_misalignments_unknown
1623 && vect_supportable_dr_alignment (dr0, false))
1626 /* Check if the target requires to prefer stores over loads, i.e., if
1627 misaligned stores are more expensive than misaligned loads (taking
1628 drs with same alignment into account). */
1629 if (first_store && DR_IS_READ (dr0))
1631 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1632 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1633 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1634 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1636 vect_get_data_access_cost (dr0, &load_inside_cost,
1637 &load_outside_cost);
1638 vect_get_data_access_cost (first_store, &store_inside_cost,
1639 &store_outside_cost);
1641 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1642 aligning the load DR0). */
1643 load_inside_penalty = store_inside_cost;
1644 load_outside_penalty = store_outside_cost;
1645 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1646 (vinfo_for_stmt (DR_STMT (first_store))),
1649 if (DR_IS_READ (dr))
1651 load_inside_penalty += load_inside_cost;
1652 load_outside_penalty += load_outside_cost;
1656 load_inside_penalty += store_inside_cost;
1657 load_outside_penalty += store_outside_cost;
1660 /* Calculate the penalty for leaving DR0 unaligned (by
1661 aligning the FIRST_STORE). */
1662 store_inside_penalty = load_inside_cost;
1663 store_outside_penalty = load_outside_cost;
1664 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1665 (vinfo_for_stmt (DR_STMT (dr0))),
1668 if (DR_IS_READ (dr))
1670 store_inside_penalty += load_inside_cost;
1671 store_outside_penalty += load_outside_cost;
1675 store_inside_penalty += store_inside_cost;
1676 store_outside_penalty += store_outside_cost;
1679 if (load_inside_penalty > store_inside_penalty
1680 || (load_inside_penalty == store_inside_penalty
1681 && load_outside_penalty > store_outside_penalty))
1685 /* In case there are only loads with different unknown misalignments, use
1686 peeling only if it may help to align other accesses in the loop. */
1687 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1688 (vinfo_for_stmt (DR_STMT (dr0))))
1689 && vect_supportable_dr_alignment (dr0, false)
1690 != dr_unaligned_supported)
1694 if (do_peeling && !dr0)
1696 /* Peeling is possible, but there is no data access that is not supported
1697 unless aligned. So we try to choose the best possible peeling. */
1699 /* We should get here only if there are drs with known misalignment. */
1700 gcc_assert (!all_misalignments_unknown);
1702 /* Choose the best peeling from the hash table. */
1703 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1710 stmt = DR_STMT (dr0);
1711 stmt_info = vinfo_for_stmt (stmt);
1712 vectype = STMT_VINFO_VECTYPE (stmt_info);
1713 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1715 if (known_alignment_for_access_p (dr0))
1717 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1718 size_zero_node) < 0;
1721 /* Since it's known at compile time, compute the number of
1722 iterations in the peeled loop (the peeling factor) for use in
1723 updating DR_MISALIGNMENT values. The peeling factor is the
1724 vectorization factor minus the misalignment as an element
1726 mis = DR_MISALIGNMENT (dr0);
1727 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1728 npeel = ((negative ? mis - nelements : nelements - mis)
1732 /* For interleaved data access every iteration accesses all the
1733 members of the group, therefore we divide the number of iterations
1734 by the group size. */
1735 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1736 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1737 npeel /= GROUP_SIZE (stmt_info);
1739 if (vect_print_dump_info (REPORT_DETAILS))
1740 fprintf (vect_dump, "Try peeling by %d", npeel);
1743 /* Ensure that all data refs can be vectorized after the peel. */
1744 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1746 int save_misalignment;
1751 stmt = DR_STMT (dr);
1752 stmt_info = vinfo_for_stmt (stmt);
1753 /* For interleaving, only the alignment of the first access
1755 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1756 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1759 save_misalignment = DR_MISALIGNMENT (dr);
1760 vect_update_misalignment_for_peel (dr, dr0, npeel);
1761 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1762 SET_DR_MISALIGNMENT (dr, save_misalignment);
1764 if (!supportable_dr_alignment)
1771 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1773 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1782 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1783 If the misalignment of DR_i is identical to that of dr0 then set
1784 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1785 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1786 by the peeling factor times the element size of DR_i (MOD the
1787 vectorization factor times the size). Otherwise, the
1788 misalignment of DR_i must be set to unknown. */
1789 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1791 vect_update_misalignment_for_peel (dr, dr0, npeel);
1793 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1795 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1797 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1798 SET_DR_MISALIGNMENT (dr0, 0);
1799 if (vect_print_dump_info (REPORT_ALIGNMENT))
1800 fprintf (vect_dump, "Alignment of access forced using peeling.");
1802 if (vect_print_dump_info (REPORT_DETAILS))
1803 fprintf (vect_dump, "Peeling for alignment will be applied.");
1805 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1812 /* (2) Versioning to force alignment. */
1814 /* Try versioning if:
1815 1) flag_tree_vect_loop_version is TRUE
1816 2) optimize loop for speed
1817 3) there is at least one unsupported misaligned data ref with an unknown
1819 4) all misaligned data refs with a known misalignment are supported, and
1820 5) the number of runtime alignment checks is within reason. */
1823 flag_tree_vect_loop_version
1824 && optimize_loop_nest_for_speed_p (loop)
1825 && (!loop->inner); /* FORNOW */
1829 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1831 stmt = DR_STMT (dr);
1832 stmt_info = vinfo_for_stmt (stmt);
1834 /* For interleaving, only the alignment of the first access
1836 if (aligned_access_p (dr)
1837 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1838 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1841 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1843 if (!supportable_dr_alignment)
1849 if (known_alignment_for_access_p (dr)
1850 || VEC_length (gimple,
1851 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1852 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1854 do_versioning = false;
1858 stmt = DR_STMT (dr);
1859 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1860 gcc_assert (vectype);
1862 /* The rightmost bits of an aligned address must be zeros.
1863 Construct the mask needed for this test. For example,
1864 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1865 mask must be 15 = 0xf. */
1866 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1868 /* FORNOW: use the same mask to test all potentially unaligned
1869 references in the loop. The vectorizer currently supports
1870 a single vector size, see the reference to
1871 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1872 vectorization factor is computed. */
1873 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1874 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1875 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1876 VEC_safe_push (gimple, heap,
1877 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1882 /* Versioning requires at least one misaligned data reference. */
1883 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1884 do_versioning = false;
1885 else if (!do_versioning)
1886 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1891 VEC(gimple,heap) *may_misalign_stmts
1892 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1895 /* It can now be assumed that the data references in the statements
1896 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1897 of the loop being vectorized. */
1898 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1901 dr = STMT_VINFO_DATA_REF (stmt_info);
1902 SET_DR_MISALIGNMENT (dr, 0);
1903 if (vect_print_dump_info (REPORT_ALIGNMENT))
1904 fprintf (vect_dump, "Alignment of access forced using versioning.");
1907 if (vect_print_dump_info (REPORT_DETAILS))
1908 fprintf (vect_dump, "Versioning for alignment will be applied.");
1910 /* Peeling and versioning can't be done together at this time. */
1911 gcc_assert (! (do_peeling && do_versioning));
1913 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1918 /* This point is reached if neither peeling nor versioning is being done. */
1919 gcc_assert (! (do_peeling || do_versioning));
1921 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1926 /* Function vect_find_same_alignment_drs.
1928 Update group and alignment relations according to the chosen
1929 vectorization factor. */
1932 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1933 loop_vec_info loop_vinfo)
1936 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1937 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1938 struct data_reference *dra = DDR_A (ddr);
1939 struct data_reference *drb = DDR_B (ddr);
1940 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1941 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1942 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1943 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1944 lambda_vector dist_v;
1945 unsigned int loop_depth;
1947 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1953 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1956 /* Loop-based vectorization and known data dependence. */
1957 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1960 /* Data-dependence analysis reports a distance vector of zero
1961 for data-references that overlap only in the first iteration
1962 but have different sign step (see PR45764).
1963 So as a sanity check require equal DR_STEP. */
1964 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1967 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1968 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1970 int dist = dist_v[loop_depth];
1972 if (vect_print_dump_info (REPORT_DR_DETAILS))
1973 fprintf (vect_dump, "dependence distance = %d.", dist);
1975 /* Same loop iteration. */
1977 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1979 /* Two references with distance zero have the same alignment. */
1980 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1981 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1982 if (vect_print_dump_info (REPORT_ALIGNMENT))
1983 fprintf (vect_dump, "accesses have the same alignment.");
1984 if (vect_print_dump_info (REPORT_DR_DETAILS))
1986 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1987 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1988 fprintf (vect_dump, " and ");
1989 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1996 /* Function vect_analyze_data_refs_alignment
1998 Analyze the alignment of the data-references in the loop.
1999 Return FALSE if a data reference is found that cannot be vectorized. */
2002 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2003 bb_vec_info bb_vinfo)
2005 if (vect_print_dump_info (REPORT_DETAILS))
2006 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2008 /* Mark groups of data references with same alignment using
2009 data dependence information. */
2012 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2013 struct data_dependence_relation *ddr;
2016 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2017 vect_find_same_alignment_drs (ddr, loop_vinfo);
2020 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2022 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2024 "not vectorized: can't calculate alignment for data ref.");
2032 /* Analyze groups of strided accesses: check that DR belongs to a group of
2033 strided accesses of legal size, step, etc. Detect gaps, single element
2034 interleaving, and other special cases. Set strided access info.
2035 Collect groups of strided stores for further use in SLP analysis. */
2038 vect_analyze_group_access (struct data_reference *dr)
2040 tree step = DR_STEP (dr);
2041 tree scalar_type = TREE_TYPE (DR_REF (dr));
2042 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2043 gimple stmt = DR_STMT (dr);
2044 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2045 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2046 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2047 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2048 HOST_WIDE_INT stride, last_accessed_element = 1;
2049 bool slp_impossible = false;
2051 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2052 interleaving group (including gaps). */
2053 stride = dr_step / type_size;
2055 /* Not consecutive access is possible only if it is a part of interleaving. */
2056 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2058 /* Check if it this DR is a part of interleaving, and is a single
2059 element of the group that is accessed in the loop. */
2061 /* Gaps are supported only for loads. STEP must be a multiple of the type
2062 size. The size of the group must be a power of 2. */
2064 && (dr_step % type_size) == 0
2066 && exact_log2 (stride) != -1)
2068 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2069 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2070 if (vect_print_dump_info (REPORT_DR_DETAILS))
2072 fprintf (vect_dump, "Detected single element interleaving ");
2073 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2074 fprintf (vect_dump, " step ");
2075 print_generic_expr (vect_dump, step, TDF_SLIM);
2080 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2082 if (vect_print_dump_info (REPORT_DETAILS))
2083 fprintf (vect_dump, "Data access with gaps requires scalar "
2090 if (vect_print_dump_info (REPORT_DETAILS))
2092 fprintf (vect_dump, "not consecutive access ");
2093 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2098 /* Mark the statement as unvectorizable. */
2099 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2106 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2108 /* First stmt in the interleaving chain. Check the chain. */
2109 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2110 struct data_reference *data_ref = dr;
2111 unsigned int count = 1;
2113 tree prev_init = DR_INIT (data_ref);
2115 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2119 /* Skip same data-refs. In case that two or more stmts share
2120 data-ref (supported only for loads), we vectorize only the first
2121 stmt, and the rest get their vectorized loads from the first
2123 if (!tree_int_cst_compare (DR_INIT (data_ref),
2124 DR_INIT (STMT_VINFO_DATA_REF (
2125 vinfo_for_stmt (next)))))
2127 if (DR_IS_WRITE (data_ref))
2129 if (vect_print_dump_info (REPORT_DETAILS))
2130 fprintf (vect_dump, "Two store stmts share the same dr.");
2134 /* Check that there is no load-store dependencies for this loads
2135 to prevent a case of load-store-load to the same location. */
2136 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2137 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2139 if (vect_print_dump_info (REPORT_DETAILS))
2141 "READ_WRITE dependence in interleaving.");
2145 /* For load use the same data-ref load. */
2146 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2149 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2155 /* Check that all the accesses have the same STEP. */
2156 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2157 if (tree_int_cst_compare (step, next_step))
2159 if (vect_print_dump_info (REPORT_DETAILS))
2160 fprintf (vect_dump, "not consecutive access in interleaving");
2164 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2165 /* Check that the distance between two accesses is equal to the type
2166 size. Otherwise, we have gaps. */
2167 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2168 - TREE_INT_CST_LOW (prev_init)) / type_size;
2171 /* FORNOW: SLP of accesses with gaps is not supported. */
2172 slp_impossible = true;
2173 if (DR_IS_WRITE (data_ref))
2175 if (vect_print_dump_info (REPORT_DETAILS))
2176 fprintf (vect_dump, "interleaved store with gaps");
2183 last_accessed_element += diff;
2185 /* Store the gap from the previous member of the group. If there is no
2186 gap in the access, GROUP_GAP is always 1. */
2187 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2189 prev_init = DR_INIT (data_ref);
2190 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2191 /* Count the number of data-refs in the chain. */
2195 /* COUNT is the number of accesses found, we multiply it by the size of
2196 the type to get COUNT_IN_BYTES. */
2197 count_in_bytes = type_size * count;
2199 /* Check that the size of the interleaving (including gaps) is not
2200 greater than STEP. */
2201 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2203 if (vect_print_dump_info (REPORT_DETAILS))
2205 fprintf (vect_dump, "interleaving size is greater than step for ");
2206 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2211 /* Check that the size of the interleaving is equal to STEP for stores,
2212 i.e., that there are no gaps. */
2213 if (dr_step && dr_step != count_in_bytes)
2215 if (DR_IS_READ (dr))
2217 slp_impossible = true;
2218 /* There is a gap after the last load in the group. This gap is a
2219 difference between the stride and the number of elements. When
2220 there is no gap, this difference should be 0. */
2221 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2225 if (vect_print_dump_info (REPORT_DETAILS))
2226 fprintf (vect_dump, "interleaved store with gaps");
2231 /* Check that STEP is a multiple of type size. */
2232 if (dr_step && (dr_step % type_size) != 0)
2234 if (vect_print_dump_info (REPORT_DETAILS))
2236 fprintf (vect_dump, "step is not a multiple of type size: step ");
2237 print_generic_expr (vect_dump, step, TDF_SLIM);
2238 fprintf (vect_dump, " size ");
2239 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2248 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2249 if (vect_print_dump_info (REPORT_DETAILS))
2250 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2252 /* SLP: create an SLP data structure for every interleaving group of
2253 stores for further analysis in vect_analyse_slp. */
2254 if (DR_IS_WRITE (dr) && !slp_impossible)
2257 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2260 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2264 /* There is a gap in the end of the group. */
2265 if (stride - last_accessed_element > 0 && loop_vinfo)
2267 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2268 if (vect_print_dump_info (REPORT_DETAILS))
2269 fprintf (vect_dump, "Data access with gaps requires scalar "
2278 /* Analyze the access pattern of the data-reference DR.
2279 In case of non-consecutive accesses call vect_analyze_group_access() to
2280 analyze groups of strided accesses. */
2283 vect_analyze_data_ref_access (struct data_reference *dr)
2285 tree step = DR_STEP (dr);
2286 tree scalar_type = TREE_TYPE (DR_REF (dr));
2287 gimple stmt = DR_STMT (dr);
2288 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2290 struct loop *loop = NULL;
2291 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2294 loop = LOOP_VINFO_LOOP (loop_vinfo);
2296 if (loop_vinfo && !step)
2298 if (vect_print_dump_info (REPORT_DETAILS))
2299 fprintf (vect_dump, "bad data-ref access in loop");
2303 /* Don't allow invariant accesses in loops. */
2304 if (loop_vinfo && dr_step == 0)
2307 if (loop && nested_in_vect_loop_p (loop, stmt))
2309 /* Interleaved accesses are not yet supported within outer-loop
2310 vectorization for references in the inner-loop. */
2311 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2313 /* For the rest of the analysis we use the outer-loop step. */
2314 step = STMT_VINFO_DR_STEP (stmt_info);
2315 dr_step = TREE_INT_CST_LOW (step);
2319 if (vect_print_dump_info (REPORT_ALIGNMENT))
2320 fprintf (vect_dump, "zero step in outer loop.");
2321 if (DR_IS_READ (dr))
2329 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2331 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2333 /* Mark that it is not interleaving. */
2334 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2338 if (loop && nested_in_vect_loop_p (loop, stmt))
2340 if (vect_print_dump_info (REPORT_ALIGNMENT))
2341 fprintf (vect_dump, "strided access in outer loop.");
2345 /* Not consecutive access - check if it's a part of interleaving group. */
2346 return vect_analyze_group_access (dr);
2350 /* Function vect_analyze_data_ref_accesses.
2352 Analyze the access pattern of all the data references in the loop.
2354 FORNOW: the only access pattern that is considered vectorizable is a
2355 simple step 1 (consecutive) access.
2357 FORNOW: handle only arrays and pointer accesses. */
2360 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2363 VEC (data_reference_p, heap) *datarefs;
2364 struct data_reference *dr;
2366 if (vect_print_dump_info (REPORT_DETAILS))
2367 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2370 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2372 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2374 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2375 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2376 && !vect_analyze_data_ref_access (dr))
2378 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2379 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2383 /* Mark the statement as not vectorizable. */
2384 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2394 /* Function vect_prune_runtime_alias_test_list.
2396 Prune a list of ddrs to be tested at run-time by versioning for alias.
2397 Return FALSE if resulting list of ddrs is longer then allowed by
2398 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2401 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2403 VEC (ddr_p, heap) * ddrs =
2404 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2407 if (vect_print_dump_info (REPORT_DETAILS))
2408 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2410 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2415 ddr_i = VEC_index (ddr_p, ddrs, i);
2418 for (j = 0; j < i; j++)
2420 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2422 if (vect_vfa_range_equal (ddr_i, ddr_j))
2424 if (vect_print_dump_info (REPORT_DR_DETAILS))
2426 fprintf (vect_dump, "found equal ranges ");
2427 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2428 fprintf (vect_dump, ", ");
2429 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2430 fprintf (vect_dump, " and ");
2431 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2432 fprintf (vect_dump, ", ");
2433 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2442 VEC_ordered_remove (ddr_p, ddrs, i);
2448 if (VEC_length (ddr_p, ddrs) >
2449 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2451 if (vect_print_dump_info (REPORT_DR_DETAILS))
2454 "disable versioning for alias - max number of generated "
2455 "checks exceeded.");
2458 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2467 /* Function vect_analyze_data_refs.
2469 Find all the data references in the loop or basic block.
2471 The general structure of the analysis of data refs in the vectorizer is as
2473 1- vect_analyze_data_refs(loop/bb): call
2474 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2475 in the loop/bb and their dependences.
2476 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2477 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2478 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2483 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2484 bb_vec_info bb_vinfo,
2487 struct loop *loop = NULL;
2488 basic_block bb = NULL;
2490 VEC (data_reference_p, heap) *datarefs;
2491 struct data_reference *dr;
2495 if (vect_print_dump_info (REPORT_DETAILS))
2496 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2500 loop = LOOP_VINFO_LOOP (loop_vinfo);
2501 res = compute_data_dependences_for_loop
2503 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2504 &LOOP_VINFO_DATAREFS (loop_vinfo),
2505 &LOOP_VINFO_DDRS (loop_vinfo));
2509 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2510 fprintf (vect_dump, "not vectorized: loop contains function calls"
2511 " or data references that cannot be analyzed");
2515 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2519 bb = BB_VINFO_BB (bb_vinfo);
2520 res = compute_data_dependences_for_bb (bb, true,
2521 &BB_VINFO_DATAREFS (bb_vinfo),
2522 &BB_VINFO_DDRS (bb_vinfo));
2525 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2526 fprintf (vect_dump, "not vectorized: basic block contains function"
2527 " calls or data references that cannot be analyzed");
2531 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2534 /* Go through the data-refs, check that the analysis succeeded. Update
2535 pointer from stmt_vec_info struct to DR and vectype. */
2537 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2540 stmt_vec_info stmt_info;
2541 tree base, offset, init;
2544 if (!dr || !DR_REF (dr))
2546 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2547 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2551 stmt = DR_STMT (dr);
2552 stmt_info = vinfo_for_stmt (stmt);
2554 /* Check that analysis of the data-ref succeeded. */
2555 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2558 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2560 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2561 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2566 /* Mark the statement as not vectorizable. */
2567 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2574 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2576 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2577 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2581 /* Mark the statement as not vectorizable. */
2582 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2589 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2591 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2593 fprintf (vect_dump, "not vectorized: volatile type ");
2594 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2599 base = unshare_expr (DR_BASE_ADDRESS (dr));
2600 offset = unshare_expr (DR_OFFSET (dr));
2601 init = unshare_expr (DR_INIT (dr));
2603 if (stmt_can_throw_internal (stmt))
2605 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2607 fprintf (vect_dump, "not vectorized: statement can throw an "
2609 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2614 /* Update DR field in stmt_vec_info struct. */
2616 /* If the dataref is in an inner-loop of the loop that is considered for
2617 for vectorization, we also want to analyze the access relative to
2618 the outer-loop (DR contains information only relative to the
2619 inner-most enclosing loop). We do that by building a reference to the
2620 first location accessed by the inner-loop, and analyze it relative to
2622 if (loop && nested_in_vect_loop_p (loop, stmt))
2624 tree outer_step, outer_base, outer_init;
2625 HOST_WIDE_INT pbitsize, pbitpos;
2627 enum machine_mode pmode;
2628 int punsignedp, pvolatilep;
2629 affine_iv base_iv, offset_iv;
2632 /* Build a reference to the first location accessed by the
2633 inner-loop: *(BASE+INIT). (The first location is actually
2634 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2635 tree inner_base = build_fold_indirect_ref
2636 (fold_build2 (POINTER_PLUS_EXPR,
2637 TREE_TYPE (base), base,
2638 fold_convert (sizetype, init)));
2640 if (vect_print_dump_info (REPORT_DETAILS))
2642 fprintf (vect_dump, "analyze in outer-loop: ");
2643 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2646 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2647 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2648 gcc_assert (outer_base != NULL_TREE);
2650 if (pbitpos % BITS_PER_UNIT != 0)
2652 if (vect_print_dump_info (REPORT_DETAILS))
2653 fprintf (vect_dump, "failed: bit offset alignment.\n");
2657 outer_base = build_fold_addr_expr (outer_base);
2658 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2661 if (vect_print_dump_info (REPORT_DETAILS))
2662 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2669 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2677 offset_iv.base = ssize_int (0);
2678 offset_iv.step = ssize_int (0);
2680 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2683 if (vect_print_dump_info (REPORT_DETAILS))
2684 fprintf (vect_dump, "evolution of offset is not affine.\n");
2688 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2689 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2690 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2691 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2692 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2694 outer_step = size_binop (PLUS_EXPR,
2695 fold_convert (ssizetype, base_iv.step),
2696 fold_convert (ssizetype, offset_iv.step));
2698 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2699 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2700 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2701 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2702 STMT_VINFO_DR_OFFSET (stmt_info) =
2703 fold_convert (ssizetype, offset_iv.base);
2704 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2705 size_int (highest_pow2_factor (offset_iv.base));
2707 if (vect_print_dump_info (REPORT_DETAILS))
2709 fprintf (vect_dump, "\touter base_address: ");
2710 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2711 fprintf (vect_dump, "\n\touter offset from base address: ");
2712 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2713 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2714 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2715 fprintf (vect_dump, "\n\touter step: ");
2716 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2717 fprintf (vect_dump, "\n\touter aligned to: ");
2718 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2722 if (STMT_VINFO_DATA_REF (stmt_info))
2724 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2727 "not vectorized: more than one data ref in stmt: ");
2728 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2733 STMT_VINFO_DATA_REF (stmt_info) = dr;
2735 /* Set vectype for STMT. */
2736 scalar_type = TREE_TYPE (DR_REF (dr));
2737 STMT_VINFO_VECTYPE (stmt_info) =
2738 get_vectype_for_scalar_type (scalar_type);
2739 if (!STMT_VINFO_VECTYPE (stmt_info))
2741 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2744 "not vectorized: no vectype for stmt: ");
2745 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2746 fprintf (vect_dump, " scalar_type: ");
2747 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2752 /* Mark the statement as not vectorizable. */
2753 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2760 /* Adjust the minimal vectorization factor according to the
2762 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2771 /* Function vect_get_new_vect_var.
2773 Returns a name for a new variable. The current naming scheme appends the
2774 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2775 the name of vectorizer generated variables, and appends that to NAME if
2779 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2786 case vect_simple_var:
2789 case vect_scalar_var:
2792 case vect_pointer_var:
2801 char* tmp = concat (prefix, name, NULL);
2802 new_vect_var = create_tmp_var (type, tmp);
2806 new_vect_var = create_tmp_var (type, prefix);
2808 /* Mark vector typed variable as a gimple register variable. */
2809 if (TREE_CODE (type) == VECTOR_TYPE)
2810 DECL_GIMPLE_REG_P (new_vect_var) = true;
2812 return new_vect_var;
2816 /* Function vect_create_addr_base_for_vector_ref.
2818 Create an expression that computes the address of the first memory location
2819 that will be accessed for a data reference.
2822 STMT: The statement containing the data reference.
2823 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2824 OFFSET: Optional. If supplied, it is be added to the initial address.
2825 LOOP: Specify relative to which loop-nest should the address be computed.
2826 For example, when the dataref is in an inner-loop nested in an
2827 outer-loop that is now being vectorized, LOOP can be either the
2828 outer-loop, or the inner-loop. The first memory location accessed
2829 by the following dataref ('in' points to short):
2836 if LOOP=i_loop: &in (relative to i_loop)
2837 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2840 1. Return an SSA_NAME whose value is the address of the memory location of
2841 the first vector of the data reference.
2842 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2843 these statement(s) which define the returned SSA_NAME.
2845 FORNOW: We are only handling array accesses with step 1. */
2848 vect_create_addr_base_for_vector_ref (gimple stmt,
2849 gimple_seq *new_stmt_list,
2853 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2854 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2855 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2857 tree data_ref_base_var;
2859 tree addr_base, addr_expr;
2861 gimple_seq seq = NULL;
2862 tree base_offset = unshare_expr (DR_OFFSET (dr));
2863 tree init = unshare_expr (DR_INIT (dr));
2865 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2866 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2869 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2871 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2873 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2875 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2876 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2877 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2881 base_name = build_fold_indirect_ref (data_ref_base);
2884 base_offset = ssize_int (0);
2885 init = ssize_int (0);
2886 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2889 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2890 add_referenced_var (data_ref_base_var);
2891 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2893 gimple_seq_add_seq (new_stmt_list, seq);
2895 /* Create base_offset */
2896 base_offset = size_binop (PLUS_EXPR,
2897 fold_convert (sizetype, base_offset),
2898 fold_convert (sizetype, init));
2899 dest = create_tmp_var (sizetype, "base_off");
2900 add_referenced_var (dest);
2901 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2902 gimple_seq_add_seq (new_stmt_list, seq);
2906 tree tmp = create_tmp_var (sizetype, "offset");
2908 add_referenced_var (tmp);
2909 offset = fold_build2 (MULT_EXPR, sizetype,
2910 fold_convert (sizetype, offset), step);
2911 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2912 base_offset, offset);
2913 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2914 gimple_seq_add_seq (new_stmt_list, seq);
2917 /* base + base_offset */
2919 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2920 data_ref_base, base_offset);
2923 addr_base = build1 (ADDR_EXPR,
2924 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2925 unshare_expr (DR_REF (dr)));
2928 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2929 base = get_base_address (DR_REF (dr));
2931 && TREE_CODE (base) == MEM_REF)
2933 = build_qualified_type (vect_ptr_type,
2934 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2936 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2937 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2938 get_name (base_name));
2939 add_referenced_var (addr_expr);
2940 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2941 gimple_seq_add_seq (new_stmt_list, seq);
2943 if (DR_PTR_INFO (dr)
2944 && TREE_CODE (vec_stmt) == SSA_NAME)
2946 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2949 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2950 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2954 if (vect_print_dump_info (REPORT_DETAILS))
2956 fprintf (vect_dump, "created ");
2957 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2964 /* Function vect_create_data_ref_ptr.
2966 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2967 location accessed in the loop by STMT, along with the def-use update
2968 chain to appropriately advance the pointer through the loop iterations.
2969 Also set aliasing information for the pointer. This pointer is used by
2970 the callers to this function to create a memory reference expression for
2971 vector load/store access.
2974 1. STMT: a stmt that references memory. Expected to be of the form
2975 GIMPLE_ASSIGN <name, data-ref> or
2976 GIMPLE_ASSIGN <data-ref, name>.
2977 2. AGGR_TYPE: the type of the reference, which should be either a vector
2979 3. AT_LOOP: the loop where the vector memref is to be created.
2980 4. OFFSET (optional): an offset to be added to the initial address accessed
2981 by the data-ref in STMT.
2982 5. BSI: location where the new stmts are to be placed if there is no loop
2983 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2984 pointing to the initial address.
2987 1. Declare a new ptr to vector_type, and have it point to the base of the
2988 data reference (initial addressed accessed by the data reference).
2989 For example, for vector of type V8HI, the following code is generated:
2992 ap = (v8hi *)initial_address;
2994 if OFFSET is not supplied:
2995 initial_address = &a[init];
2996 if OFFSET is supplied:
2997 initial_address = &a[init + OFFSET];
2999 Return the initial_address in INITIAL_ADDRESS.
3001 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3002 update the pointer in each iteration of the loop.
3004 Return the increment stmt that updates the pointer in PTR_INCR.
3006 3. Set INV_P to true if the access pattern of the data reference in the
3007 vectorized loop is invariant. Set it to false otherwise.
3009 4. Return the pointer. */
3012 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3013 tree offset, tree *initial_address,
3014 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3015 bool only_init, bool *inv_p)
3018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3019 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3020 struct loop *loop = NULL;
3021 bool nested_in_vect_loop = false;
3022 struct loop *containing_loop = NULL;
3027 gimple_seq new_stmt_list = NULL;
3031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3033 gimple_stmt_iterator incr_gsi;
3036 tree indx_before_incr, indx_after_incr;
3039 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3042 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3043 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3047 loop = LOOP_VINFO_LOOP (loop_vinfo);
3048 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3049 containing_loop = (gimple_bb (stmt))->loop_father;
3050 pe = loop_preheader_edge (loop);
3054 gcc_assert (bb_vinfo);
3059 /* Check the step (evolution) of the load in LOOP, and record
3060 whether it's invariant. */
3061 if (nested_in_vect_loop)
3062 step = STMT_VINFO_DR_STEP (stmt_info);
3064 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3066 if (tree_int_cst_compare (step, size_zero_node) == 0)
3070 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3072 /* Create an expression for the first address accessed by this load
3074 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3076 if (vect_print_dump_info (REPORT_DETAILS))
3078 tree data_ref_base = base_name;
3079 fprintf (vect_dump, "create %s-pointer variable to type: ",
3080 tree_code_name[(int) TREE_CODE (aggr_type)]);
3081 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3082 if (TREE_CODE (data_ref_base) == VAR_DECL
3083 || TREE_CODE (data_ref_base) == ARRAY_REF)
3084 fprintf (vect_dump, " vectorizing an array ref: ");
3085 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3086 fprintf (vect_dump, " vectorizing a record based array ref: ");
3087 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3088 fprintf (vect_dump, " vectorizing a pointer ref: ");
3089 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3092 /* (1) Create the new aggregate-pointer variable. */
3093 aggr_ptr_type = build_pointer_type (aggr_type);
3094 base = get_base_address (DR_REF (dr));
3096 && TREE_CODE (base) == MEM_REF)
3098 = build_qualified_type (aggr_ptr_type,
3099 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3100 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3101 get_name (base_name));
3103 /* Vector and array types inherit the alias set of their component
3104 type by default so we need to use a ref-all pointer if the data
3105 reference does not conflict with the created aggregated data
3106 reference because it is not addressable. */
3107 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3108 get_alias_set (DR_REF (dr))))
3111 = build_pointer_type_for_mode (aggr_type,
3112 TYPE_MODE (aggr_ptr_type), true);
3113 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3114 get_name (base_name));
3117 /* Likewise for any of the data references in the stmt group. */
3118 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3120 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3123 tree lhs = gimple_assign_lhs (orig_stmt);
3124 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3125 get_alias_set (lhs)))
3128 = build_pointer_type_for_mode (aggr_type,
3129 TYPE_MODE (aggr_ptr_type), true);
3131 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3132 get_name (base_name));
3136 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3141 add_referenced_var (aggr_ptr);
3143 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3144 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3145 def-use update cycles for the pointer: one relative to the outer-loop
3146 (LOOP), which is what steps (3) and (4) below do. The other is relative
3147 to the inner-loop (which is the inner-most loop containing the dataref),
3148 and this is done be step (5) below.
3150 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3151 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3152 redundant. Steps (3),(4) create the following:
3155 LOOP: vp1 = phi(vp0,vp2)
3161 If there is an inner-loop nested in loop, then step (5) will also be
3162 applied, and an additional update in the inner-loop will be created:
3165 LOOP: vp1 = phi(vp0,vp2)
3167 inner: vp3 = phi(vp1,vp4)
3168 vp4 = vp3 + inner_step
3174 /* (2) Calculate the initial address of the aggregate-pointer, and set
3175 the aggregate-pointer to point to it before the loop. */
3177 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3179 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3185 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3186 gcc_assert (!new_bb);
3189 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3192 *initial_address = new_temp;
3194 /* Create: p = (aggr_type *) initial_base */
3195 if (TREE_CODE (new_temp) != SSA_NAME
3196 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3198 vec_stmt = gimple_build_assign (aggr_ptr,
3199 fold_convert (aggr_ptr_type, new_temp));
3200 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3201 /* Copy the points-to information if it exists. */
3202 if (DR_PTR_INFO (dr))
3203 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3204 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3207 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3208 gcc_assert (!new_bb);
3211 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3214 aggr_ptr_init = new_temp;
3216 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3217 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3218 inner-loop nested in LOOP (during outer-loop vectorization). */
3220 /* No update in loop is required. */
3221 if (only_init && (!loop_vinfo || at_loop == loop))
3222 aptr = aggr_ptr_init;
3225 /* The step of the aggregate pointer is the type size. */
3226 tree step = TYPE_SIZE_UNIT (aggr_type);
3227 /* One exception to the above is when the scalar step of the load in
3228 LOOP is zero. In this case the step here is also zero. */
3230 step = size_zero_node;
3232 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3234 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3236 create_iv (aggr_ptr_init,
3237 fold_convert (aggr_ptr_type, step),
3238 aggr_ptr, loop, &incr_gsi, insert_after,
3239 &indx_before_incr, &indx_after_incr);
3240 incr = gsi_stmt (incr_gsi);
3241 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3243 /* Copy the points-to information if it exists. */
3244 if (DR_PTR_INFO (dr))
3246 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3247 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3252 aptr = indx_before_incr;
3255 if (!nested_in_vect_loop || only_init)
3259 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3260 nested in LOOP, if exists. */
3262 gcc_assert (nested_in_vect_loop);
3265 standard_iv_increment_position (containing_loop, &incr_gsi,
3267 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3268 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3270 incr = gsi_stmt (incr_gsi);
3271 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3273 /* Copy the points-to information if it exists. */
3274 if (DR_PTR_INFO (dr))
3276 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3277 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3282 return indx_before_incr;
3289 /* Function bump_vector_ptr
3291 Increment a pointer (to a vector type) by vector-size. If requested,
3292 i.e. if PTR-INCR is given, then also connect the new increment stmt
3293 to the existing def-use update-chain of the pointer, by modifying
3294 the PTR_INCR as illustrated below:
3296 The pointer def-use update-chain before this function:
3297 DATAREF_PTR = phi (p_0, p_2)
3299 PTR_INCR: p_2 = DATAREF_PTR + step
3301 The pointer def-use update-chain after this function:
3302 DATAREF_PTR = phi (p_0, p_2)
3304 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3306 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3309 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3311 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3312 the loop. The increment amount across iterations is expected
3314 BSI - location where the new update stmt is to be placed.
3315 STMT - the original scalar memory-access stmt that is being vectorized.
3316 BUMP - optional. The offset by which to bump the pointer. If not given,
3317 the offset is assumed to be vector_size.
3319 Output: Return NEW_DATAREF_PTR as illustrated above.
3324 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3325 gimple stmt, tree bump)
3327 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3328 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3329 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3330 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3331 tree update = TYPE_SIZE_UNIT (vectype);
3334 use_operand_p use_p;
3335 tree new_dataref_ptr;
3340 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3341 dataref_ptr, update);
3342 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3343 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3344 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3346 /* Copy the points-to information if it exists. */
3347 if (DR_PTR_INFO (dr))
3349 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3350 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3351 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3355 return new_dataref_ptr;
3357 /* Update the vector-pointer's cross-iteration increment. */
3358 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3360 tree use = USE_FROM_PTR (use_p);
3362 if (use == dataref_ptr)
3363 SET_USE (use_p, new_dataref_ptr);
3365 gcc_assert (tree_int_cst_compare (use, update) == 0);
3368 return new_dataref_ptr;
3372 /* Function vect_create_destination_var.
3374 Create a new temporary of type VECTYPE. */
3377 vect_create_destination_var (tree scalar_dest, tree vectype)
3380 const char *new_name;
3382 enum vect_var_kind kind;
3384 kind = vectype ? vect_simple_var : vect_scalar_var;
3385 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3387 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3389 new_name = get_name (scalar_dest);
3392 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3393 add_referenced_var (vec_dest);
3398 /* Function vect_strided_store_supported.
3400 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3401 and FALSE otherwise. */
3404 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3406 optab interleave_high_optab, interleave_low_optab;
3407 enum machine_mode mode;
3409 mode = TYPE_MODE (vectype);
3411 /* vect_permute_store_chain requires the group size to be a power of two. */
3412 if (exact_log2 (count) == -1)
3414 if (vect_print_dump_info (REPORT_DETAILS))
3415 fprintf (vect_dump, "the size of the group of strided accesses"
3416 " is not a power of 2");
3420 /* Check that the operation is supported. */
3421 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3422 vectype, optab_default);
3423 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3424 vectype, optab_default);
3425 if (!interleave_high_optab || !interleave_low_optab)
3427 if (vect_print_dump_info (REPORT_DETAILS))
3428 fprintf (vect_dump, "no optab for interleave.");
3432 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3433 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3435 if (vect_print_dump_info (REPORT_DETAILS))
3436 fprintf (vect_dump, "interleave op not supported by target.");
3444 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3448 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3450 return vect_lanes_optab_supported_p ("vec_store_lanes",
3451 vec_store_lanes_optab,
3456 /* Function vect_permute_store_chain.
3458 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3459 a power of 2, generate interleave_high/low stmts to reorder the data
3460 correctly for the stores. Return the final references for stores in
3463 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3464 The input is 4 vectors each containing 8 elements. We assign a number to
3465 each element, the input sequence is:
3467 1st vec: 0 1 2 3 4 5 6 7
3468 2nd vec: 8 9 10 11 12 13 14 15
3469 3rd vec: 16 17 18 19 20 21 22 23
3470 4th vec: 24 25 26 27 28 29 30 31
3472 The output sequence should be:
3474 1st vec: 0 8 16 24 1 9 17 25
3475 2nd vec: 2 10 18 26 3 11 19 27
3476 3rd vec: 4 12 20 28 5 13 21 30
3477 4th vec: 6 14 22 30 7 15 23 31
3479 i.e., we interleave the contents of the four vectors in their order.
3481 We use interleave_high/low instructions to create such output. The input of
3482 each interleave_high/low operation is two vectors:
3485 the even elements of the result vector are obtained left-to-right from the
3486 high/low elements of the first vector. The odd elements of the result are
3487 obtained left-to-right from the high/low elements of the second vector.
3488 The output of interleave_high will be: 0 4 1 5
3489 and of interleave_low: 2 6 3 7
3492 The permutation is done in log LENGTH stages. In each stage interleave_high
3493 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3494 where the first argument is taken from the first half of DR_CHAIN and the
3495 second argument from it's second half.
3498 I1: interleave_high (1st vec, 3rd vec)
3499 I2: interleave_low (1st vec, 3rd vec)
3500 I3: interleave_high (2nd vec, 4th vec)
3501 I4: interleave_low (2nd vec, 4th vec)
3503 The output for the first stage is:
3505 I1: 0 16 1 17 2 18 3 19
3506 I2: 4 20 5 21 6 22 7 23
3507 I3: 8 24 9 25 10 26 11 27
3508 I4: 12 28 13 29 14 30 15 31
3510 The output of the second stage, i.e. the final result is:
3512 I1: 0 8 16 24 1 9 17 25
3513 I2: 2 10 18 26 3 11 19 27
3514 I3: 4 12 20 28 5 13 21 30
3515 I4: 6 14 22 30 7 15 23 31. */
3518 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3519 unsigned int length,
3521 gimple_stmt_iterator *gsi,
3522 VEC(tree,heap) **result_chain)
3524 tree perm_dest, vect1, vect2, high, low;
3526 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3529 enum tree_code high_code, low_code;
3531 gcc_assert (vect_strided_store_supported (vectype, length));
3533 *result_chain = VEC_copy (tree, heap, dr_chain);
3535 for (i = 0; i < exact_log2 (length); i++)
3537 for (j = 0; j < length/2; j++)
3539 vect1 = VEC_index (tree, dr_chain, j);
3540 vect2 = VEC_index (tree, dr_chain, j+length/2);
3542 /* Create interleaving stmt:
3543 in the case of big endian:
3544 high = interleave_high (vect1, vect2)
3545 and in the case of little endian:
3546 high = interleave_low (vect1, vect2). */
3547 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3548 DECL_GIMPLE_REG_P (perm_dest) = 1;
3549 add_referenced_var (perm_dest);
3550 if (BYTES_BIG_ENDIAN)
3552 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3553 low_code = VEC_INTERLEAVE_LOW_EXPR;
3557 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3558 high_code = VEC_INTERLEAVE_LOW_EXPR;
3560 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3562 high = make_ssa_name (perm_dest, perm_stmt);
3563 gimple_assign_set_lhs (perm_stmt, high);
3564 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3565 VEC_replace (tree, *result_chain, 2*j, high);
3567 /* Create interleaving stmt:
3568 in the case of big endian:
3569 low = interleave_low (vect1, vect2)
3570 and in the case of little endian:
3571 low = interleave_high (vect1, vect2). */
3572 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3573 DECL_GIMPLE_REG_P (perm_dest) = 1;
3574 add_referenced_var (perm_dest);
3575 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3577 low = make_ssa_name (perm_dest, perm_stmt);
3578 gimple_assign_set_lhs (perm_stmt, low);
3579 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3580 VEC_replace (tree, *result_chain, 2*j+1, low);
3582 dr_chain = VEC_copy (tree, heap, *result_chain);
3586 /* Function vect_setup_realignment
3588 This function is called when vectorizing an unaligned load using
3589 the dr_explicit_realign[_optimized] scheme.
3590 This function generates the following code at the loop prolog:
3593 x msq_init = *(floor(p)); # prolog load
3594 realignment_token = call target_builtin;
3596 x msq = phi (msq_init, ---)
3598 The stmts marked with x are generated only for the case of
3599 dr_explicit_realign_optimized.
3601 The code above sets up a new (vector) pointer, pointing to the first
3602 location accessed by STMT, and a "floor-aligned" load using that pointer.
3603 It also generates code to compute the "realignment-token" (if the relevant
3604 target hook was defined), and creates a phi-node at the loop-header bb
3605 whose arguments are the result of the prolog-load (created by this
3606 function) and the result of a load that takes place in the loop (to be
3607 created by the caller to this function).
3609 For the case of dr_explicit_realign_optimized:
3610 The caller to this function uses the phi-result (msq) to create the
3611 realignment code inside the loop, and sets up the missing phi argument,
3614 msq = phi (msq_init, lsq)
3615 lsq = *(floor(p')); # load in loop
3616 result = realign_load (msq, lsq, realignment_token);
3618 For the case of dr_explicit_realign:
3620 msq = *(floor(p)); # load in loop
3622 lsq = *(floor(p')); # load in loop
3623 result = realign_load (msq, lsq, realignment_token);
3626 STMT - (scalar) load stmt to be vectorized. This load accesses
3627 a memory location that may be unaligned.
3628 BSI - place where new code is to be inserted.
3629 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3633 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3634 target hook, if defined.
3635 Return value - the result of the loop-header phi node. */
3638 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3639 tree *realignment_token,
3640 enum dr_alignment_support alignment_support_scheme,
3642 struct loop **at_loop)
3644 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3645 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3647 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3648 struct loop *loop = NULL;
3650 tree scalar_dest = gimple_assign_lhs (stmt);
3657 tree msq_init = NULL_TREE;
3660 tree msq = NULL_TREE;
3661 gimple_seq stmts = NULL;
3663 bool compute_in_loop = false;
3664 bool nested_in_vect_loop = false;
3665 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3666 struct loop *loop_for_initial_load = NULL;
3670 loop = LOOP_VINFO_LOOP (loop_vinfo);
3671 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3674 gcc_assert (alignment_support_scheme == dr_explicit_realign
3675 || alignment_support_scheme == dr_explicit_realign_optimized);
3677 /* We need to generate three things:
3678 1. the misalignment computation
3679 2. the extra vector load (for the optimized realignment scheme).
3680 3. the phi node for the two vectors from which the realignment is
3681 done (for the optimized realignment scheme). */
3683 /* 1. Determine where to generate the misalignment computation.
3685 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3686 calculation will be generated by this function, outside the loop (in the
3687 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3688 caller, inside the loop.
3690 Background: If the misalignment remains fixed throughout the iterations of
3691 the loop, then both realignment schemes are applicable, and also the
3692 misalignment computation can be done outside LOOP. This is because we are
3693 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3694 are a multiple of VS (the Vector Size), and therefore the misalignment in
3695 different vectorized LOOP iterations is always the same.
3696 The problem arises only if the memory access is in an inner-loop nested
3697 inside LOOP, which is now being vectorized using outer-loop vectorization.
3698 This is the only case when the misalignment of the memory access may not
3699 remain fixed throughout the iterations of the inner-loop (as explained in
3700 detail in vect_supportable_dr_alignment). In this case, not only is the
3701 optimized realignment scheme not applicable, but also the misalignment
3702 computation (and generation of the realignment token that is passed to
3703 REALIGN_LOAD) have to be done inside the loop.
3705 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3706 or not, which in turn determines if the misalignment is computed inside
3707 the inner-loop, or outside LOOP. */
3709 if (init_addr != NULL_TREE || !loop_vinfo)
3711 compute_in_loop = true;
3712 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3716 /* 2. Determine where to generate the extra vector load.
3718 For the optimized realignment scheme, instead of generating two vector
3719 loads in each iteration, we generate a single extra vector load in the
3720 preheader of the loop, and in each iteration reuse the result of the
3721 vector load from the previous iteration. In case the memory access is in
3722 an inner-loop nested inside LOOP, which is now being vectorized using
3723 outer-loop vectorization, we need to determine whether this initial vector
3724 load should be generated at the preheader of the inner-loop, or can be
3725 generated at the preheader of LOOP. If the memory access has no evolution
3726 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3727 to be generated inside LOOP (in the preheader of the inner-loop). */
3729 if (nested_in_vect_loop)
3731 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3732 bool invariant_in_outerloop =
3733 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3734 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3737 loop_for_initial_load = loop;
3739 *at_loop = loop_for_initial_load;
3741 if (loop_for_initial_load)
3742 pe = loop_preheader_edge (loop_for_initial_load);
3744 /* 3. For the case of the optimized realignment, create the first vector
3745 load at the loop preheader. */
3747 if (alignment_support_scheme == dr_explicit_realign_optimized)
3749 /* Create msq_init = *(floor(p1)) in the loop preheader */
3751 gcc_assert (!compute_in_loop);
3752 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3753 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3754 NULL_TREE, &init_addr, NULL, &inc,
3756 new_stmt = gimple_build_assign_with_ops
3757 (BIT_AND_EXPR, NULL_TREE, ptr,
3758 build_int_cst (TREE_TYPE (ptr),
3759 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3760 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3761 gimple_assign_set_lhs (new_stmt, new_temp);
3762 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3763 gcc_assert (!new_bb);
3765 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3766 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3767 new_stmt = gimple_build_assign (vec_dest, data_ref);
3768 new_temp = make_ssa_name (vec_dest, new_stmt);
3769 gimple_assign_set_lhs (new_stmt, new_temp);
3770 mark_symbols_for_renaming (new_stmt);
3773 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3774 gcc_assert (!new_bb);
3777 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3779 msq_init = gimple_assign_lhs (new_stmt);
3782 /* 4. Create realignment token using a target builtin, if available.
3783 It is done either inside the containing loop, or before LOOP (as
3784 determined above). */
3786 if (targetm.vectorize.builtin_mask_for_load)
3790 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3793 /* Generate the INIT_ADDR computation outside LOOP. */
3794 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3798 pe = loop_preheader_edge (loop);
3799 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3800 gcc_assert (!new_bb);
3803 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3806 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3807 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3809 vect_create_destination_var (scalar_dest,
3810 gimple_call_return_type (new_stmt));
3811 new_temp = make_ssa_name (vec_dest, new_stmt);
3812 gimple_call_set_lhs (new_stmt, new_temp);
3814 if (compute_in_loop)
3815 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3818 /* Generate the misalignment computation outside LOOP. */
3819 pe = loop_preheader_edge (loop);
3820 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3821 gcc_assert (!new_bb);
3824 *realignment_token = gimple_call_lhs (new_stmt);
3826 /* The result of the CALL_EXPR to this builtin is determined from
3827 the value of the parameter and no global variables are touched
3828 which makes the builtin a "const" function. Requiring the
3829 builtin to have the "const" attribute makes it unnecessary
3830 to call mark_call_clobbered. */
3831 gcc_assert (TREE_READONLY (builtin_decl));
3834 if (alignment_support_scheme == dr_explicit_realign)
3837 gcc_assert (!compute_in_loop);
3838 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3841 /* 5. Create msq = phi <msq_init, lsq> in loop */
3843 pe = loop_preheader_edge (containing_loop);
3844 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3845 msq = make_ssa_name (vec_dest, NULL);
3846 phi_stmt = create_phi_node (msq, containing_loop->header);
3847 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3848 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3854 /* Function vect_strided_load_supported.
3856 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3857 and FALSE otherwise. */
3860 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3862 optab perm_even_optab, perm_odd_optab;
3863 enum machine_mode mode;
3865 mode = TYPE_MODE (vectype);
3867 /* vect_permute_load_chain requires the group size to be a power of two. */
3868 if (exact_log2 (count) == -1)
3870 if (vect_print_dump_info (REPORT_DETAILS))
3871 fprintf (vect_dump, "the size of the group of strided accesses"
3872 " is not a power of 2");
3876 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3878 if (!perm_even_optab)
3880 if (vect_print_dump_info (REPORT_DETAILS))
3881 fprintf (vect_dump, "no optab for perm_even.");
3885 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3887 if (vect_print_dump_info (REPORT_DETAILS))
3888 fprintf (vect_dump, "perm_even op not supported by target.");
3892 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3894 if (!perm_odd_optab)
3896 if (vect_print_dump_info (REPORT_DETAILS))
3897 fprintf (vect_dump, "no optab for perm_odd.");
3901 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3903 if (vect_print_dump_info (REPORT_DETAILS))
3904 fprintf (vect_dump, "perm_odd op not supported by target.");
3910 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3914 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3916 return vect_lanes_optab_supported_p ("vec_load_lanes",
3917 vec_load_lanes_optab,
3921 /* Function vect_permute_load_chain.
3923 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3924 a power of 2, generate extract_even/odd stmts to reorder the input data
3925 correctly. Return the final references for loads in RESULT_CHAIN.
3927 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3928 The input is 4 vectors each containing 8 elements. We assign a number to each
3929 element, the input sequence is:
3931 1st vec: 0 1 2 3 4 5 6 7
3932 2nd vec: 8 9 10 11 12 13 14 15
3933 3rd vec: 16 17 18 19 20 21 22 23
3934 4th vec: 24 25 26 27 28 29 30 31
3936 The output sequence should be:
3938 1st vec: 0 4 8 12 16 20 24 28
3939 2nd vec: 1 5 9 13 17 21 25 29
3940 3rd vec: 2 6 10 14 18 22 26 30
3941 4th vec: 3 7 11 15 19 23 27 31
3943 i.e., the first output vector should contain the first elements of each
3944 interleaving group, etc.
3946 We use extract_even/odd instructions to create such output. The input of
3947 each extract_even/odd operation is two vectors
3951 and the output is the vector of extracted even/odd elements. The output of
3952 extract_even will be: 0 2 4 6
3953 and of extract_odd: 1 3 5 7
3956 The permutation is done in log LENGTH stages. In each stage extract_even
3957 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3958 their order. In our example,
3960 E1: extract_even (1st vec, 2nd vec)
3961 E2: extract_odd (1st vec, 2nd vec)
3962 E3: extract_even (3rd vec, 4th vec)
3963 E4: extract_odd (3rd vec, 4th vec)
3965 The output for the first stage will be:
3967 E1: 0 2 4 6 8 10 12 14
3968 E2: 1 3 5 7 9 11 13 15
3969 E3: 16 18 20 22 24 26 28 30
3970 E4: 17 19 21 23 25 27 29 31
3972 In order to proceed and create the correct sequence for the next stage (or
3973 for the correct output, if the second stage is the last one, as in our
3974 example), we first put the output of extract_even operation and then the
3975 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3976 The input for the second stage is:
3978 1st vec (E1): 0 2 4 6 8 10 12 14
3979 2nd vec (E3): 16 18 20 22 24 26 28 30
3980 3rd vec (E2): 1 3 5 7 9 11 13 15
3981 4th vec (E4): 17 19 21 23 25 27 29 31
3983 The output of the second stage:
3985 E1: 0 4 8 12 16 20 24 28
3986 E2: 2 6 10 14 18 22 26 30
3987 E3: 1 5 9 13 17 21 25 29
3988 E4: 3 7 11 15 19 23 27 31
3990 And RESULT_CHAIN after reordering:
3992 1st vec (E1): 0 4 8 12 16 20 24 28
3993 2nd vec (E3): 1 5 9 13 17 21 25 29
3994 3rd vec (E2): 2 6 10 14 18 22 26 30
3995 4th vec (E4): 3 7 11 15 19 23 27 31. */
3998 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3999 unsigned int length,
4001 gimple_stmt_iterator *gsi,
4002 VEC(tree,heap) **result_chain)
4004 tree perm_dest, data_ref, first_vect, second_vect;
4006 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4010 gcc_assert (vect_strided_load_supported (vectype, length));
4012 *result_chain = VEC_copy (tree, heap, dr_chain);
4013 for (i = 0; i < exact_log2 (length); i++)
4015 for (j = 0; j < length; j +=2)
4017 first_vect = VEC_index (tree, dr_chain, j);
4018 second_vect = VEC_index (tree, dr_chain, j+1);
4020 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4021 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4022 DECL_GIMPLE_REG_P (perm_dest) = 1;
4023 add_referenced_var (perm_dest);
4025 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4026 perm_dest, first_vect,
4029 data_ref = make_ssa_name (perm_dest, perm_stmt);
4030 gimple_assign_set_lhs (perm_stmt, data_ref);
4031 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4032 mark_symbols_for_renaming (perm_stmt);
4034 VEC_replace (tree, *result_chain, j/2, data_ref);
4036 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4037 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4038 DECL_GIMPLE_REG_P (perm_dest) = 1;
4039 add_referenced_var (perm_dest);
4041 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4042 perm_dest, first_vect,
4044 data_ref = make_ssa_name (perm_dest, perm_stmt);
4045 gimple_assign_set_lhs (perm_stmt, data_ref);
4046 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4047 mark_symbols_for_renaming (perm_stmt);
4049 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4051 dr_chain = VEC_copy (tree, heap, *result_chain);
4056 /* Function vect_transform_strided_load.
4058 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4059 to perform their permutation and ascribe the result vectorized statements to
4060 the scalar statements.
4064 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4065 gimple_stmt_iterator *gsi)
4067 VEC(tree,heap) *result_chain = NULL;
4069 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4070 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4071 vectors, that are ready for vector computation. */
4072 result_chain = VEC_alloc (tree, heap, size);
4073 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4074 vect_record_strided_load_vectors (stmt, result_chain);
4075 VEC_free (tree, heap, result_chain);
4078 /* RESULT_CHAIN contains the output of a group of strided loads that were
4079 generated as part of the vectorization of STMT. Assign the statement
4080 for each vector to the associated scalar statement. */
4083 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4085 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4086 gimple next_stmt, new_stmt;
4087 unsigned int i, gap_count;
4090 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4091 Since we scan the chain starting from it's first node, their order
4092 corresponds the order of data-refs in RESULT_CHAIN. */
4093 next_stmt = first_stmt;
4095 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4100 /* Skip the gaps. Loads created for the gaps will be removed by dead
4101 code elimination pass later. No need to check for the first stmt in
4102 the group, since it always exists.
4103 GROUP_GAP is the number of steps in elements from the previous
4104 access (if there is no gap GROUP_GAP is 1). We skip loads that
4105 correspond to the gaps. */
4106 if (next_stmt != first_stmt
4107 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4115 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4116 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4117 copies, and we put the new vector statement in the first available
4119 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4120 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4123 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4126 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4128 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4131 prev_stmt = rel_stmt;
4133 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4136 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4141 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4143 /* If NEXT_STMT accesses the same DR as the previous statement,
4144 put the same TMP_DATA_REF as its vectorized statement; otherwise
4145 get the next data-ref from RESULT_CHAIN. */
4146 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4152 /* Function vect_force_dr_alignment_p.
4154 Returns whether the alignment of a DECL can be forced to be aligned
4155 on ALIGNMENT bit boundary. */
4158 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4160 if (TREE_CODE (decl) != VAR_DECL)
4163 if (DECL_EXTERNAL (decl))
4166 if (TREE_ASM_WRITTEN (decl))
4169 if (TREE_STATIC (decl))
4170 return (alignment <= MAX_OFILE_ALIGNMENT);
4172 return (alignment <= MAX_STACK_ALIGNMENT);
4176 /* Return whether the data reference DR is supported with respect to its
4178 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4179 it is aligned, i.e., check if it is possible to vectorize it with different
4182 enum dr_alignment_support
4183 vect_supportable_dr_alignment (struct data_reference *dr,
4184 bool check_aligned_accesses)
4186 gimple stmt = DR_STMT (dr);
4187 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4188 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4189 enum machine_mode mode = TYPE_MODE (vectype);
4190 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4191 struct loop *vect_loop = NULL;
4192 bool nested_in_vect_loop = false;
4194 if (aligned_access_p (dr) && !check_aligned_accesses)
4199 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4200 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4203 /* Possibly unaligned access. */
4205 /* We can choose between using the implicit realignment scheme (generating
4206 a misaligned_move stmt) and the explicit realignment scheme (generating
4207 aligned loads with a REALIGN_LOAD). There are two variants to the
4208 explicit realignment scheme: optimized, and unoptimized.
4209 We can optimize the realignment only if the step between consecutive
4210 vector loads is equal to the vector size. Since the vector memory
4211 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4212 is guaranteed that the misalignment amount remains the same throughout the
4213 execution of the vectorized loop. Therefore, we can create the
4214 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4215 at the loop preheader.
4217 However, in the case of outer-loop vectorization, when vectorizing a
4218 memory access in the inner-loop nested within the LOOP that is now being
4219 vectorized, while it is guaranteed that the misalignment of the
4220 vectorized memory access will remain the same in different outer-loop
4221 iterations, it is *not* guaranteed that is will remain the same throughout
4222 the execution of the inner-loop. This is because the inner-loop advances
4223 with the original scalar step (and not in steps of VS). If the inner-loop
4224 step happens to be a multiple of VS, then the misalignment remains fixed
4225 and we can use the optimized realignment scheme. For example:
4231 When vectorizing the i-loop in the above example, the step between
4232 consecutive vector loads is 1, and so the misalignment does not remain
4233 fixed across the execution of the inner-loop, and the realignment cannot
4234 be optimized (as illustrated in the following pseudo vectorized loop):
4236 for (i=0; i<N; i+=4)
4237 for (j=0; j<M; j++){
4238 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4239 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4240 // (assuming that we start from an aligned address).
4243 We therefore have to use the unoptimized realignment scheme:
4245 for (i=0; i<N; i+=4)
4246 for (j=k; j<M; j+=4)
4247 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4248 // that the misalignment of the initial address is
4251 The loop can then be vectorized as follows:
4253 for (k=0; k<4; k++){
4254 rt = get_realignment_token (&vp[k]);
4255 for (i=0; i<N; i+=4){
4257 for (j=k; j<M; j+=4){
4259 va = REALIGN_LOAD <v1,v2,rt>;
4266 if (DR_IS_READ (dr))
4268 bool is_packed = false;
4269 tree type = (TREE_TYPE (DR_REF (dr)));
4271 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4272 && (!targetm.vectorize.builtin_mask_for_load
4273 || targetm.vectorize.builtin_mask_for_load ()))
4275 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4276 if ((nested_in_vect_loop
4277 && (TREE_INT_CST_LOW (DR_STEP (dr))
4278 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4280 return dr_explicit_realign;
4282 return dr_explicit_realign_optimized;
4284 if (!known_alignment_for_access_p (dr))
4286 tree ba = DR_BASE_OBJECT (dr);
4289 is_packed = contains_packed_reference (ba);
4292 if (targetm.vectorize.
4293 support_vector_misalignment (mode, type,
4294 DR_MISALIGNMENT (dr), is_packed))
4295 /* Can't software pipeline the loads, but can at least do them. */
4296 return dr_unaligned_supported;
4300 bool is_packed = false;
4301 tree type = (TREE_TYPE (DR_REF (dr)));
4303 if (!known_alignment_for_access_p (dr))
4305 tree ba = DR_BASE_OBJECT (dr);
4308 is_packed = contains_packed_reference (ba);
4311 if (targetm.vectorize.
4312 support_vector_misalignment (mode, type,
4313 DR_MISALIGNMENT (dr), is_packed))
4314 return dr_unaligned_supported;
4318 return dr_unaligned_unsupported;