1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
141 while (next_stmt && next_stmt != stmt)
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
205 NEXT(FIRST_DR) = previous FIRST_DR
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
344 /* We only call this function for pairs of loads and stores, but we verify
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf,
559 bool *data_dependence_in_bb)
562 struct loop *loop = NULL;
563 struct data_reference *dra = DDR_A (ddr);
564 struct data_reference *drb = DDR_B (ddr);
565 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
566 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
567 lambda_vector dist_v;
568 unsigned int loop_depth;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
575 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra, drb);
583 loop = LOOP_VINFO_LOOP (loop_vinfo);
585 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
594 fprintf (vect_dump, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
597 fprintf (vect_dump, " and ");
598 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
605 /* When vectorizing a basic block unknown depnedence can still mean
607 if (vect_check_interleaving (dra, drb))
610 if (vect_print_dump_info (REPORT_DR_DETAILS))
612 fprintf (vect_dump, "can't determine dependence between ");
613 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
614 fprintf (vect_dump, " and ");
615 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
618 /* We do not vectorize basic blocks with write-write dependencies. */
619 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
622 /* We deal with read-write dependencies in basic blocks later (by
623 verifying that all the loads in the basic block are before all the
625 *data_dependence_in_bb = true;
629 /* Versioning for alias is not yet supported for basic block SLP, and
630 dependence distance is unapplicable, hence, in case of known data
631 dependence, basic block vectorization is impossible for now. */
634 if (dra != drb && vect_check_interleaving (dra, drb))
637 if (vect_print_dump_info (REPORT_DR_DETAILS))
639 fprintf (vect_dump, "determined dependence between ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
645 /* Do not vectorize basic blcoks with write-write dependences. */
646 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
649 /* Check if this dependence is allowed in basic block vectorization. */
650 return vect_drs_dependent_in_basic_block (dra, drb);
653 /* Loop-based vectorization and known data dependence. */
654 if (DDR_NUM_DIST_VECTS (ddr) == 0)
656 if (vect_print_dump_info (REPORT_DR_DETAILS))
658 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
663 /* Add to list of ddrs that need to be tested at run-time. */
664 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
667 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
668 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
670 int dist = dist_v[loop_depth];
672 if (vect_print_dump_info (REPORT_DR_DETAILS))
673 fprintf (vect_dump, "dependence distance = %d.", dist);
677 if (vect_print_dump_info (REPORT_DR_DETAILS))
679 fprintf (vect_dump, "dependence distance == 0 between ");
680 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
681 fprintf (vect_dump, " and ");
682 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
685 /* For interleaving, mark that there is a read-write dependency if
686 necessary. We check before that one of the data-refs is store. */
687 if (DR_IS_READ (dra))
688 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
691 if (DR_IS_READ (drb))
692 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
698 if (dist > 0 && DDR_REVERSED_P (ddr))
700 /* If DDR_REVERSED_P the order of the data-refs in DDR was
701 reversed (to make distance vector positive), and the actual
702 distance is negative. */
703 if (vect_print_dump_info (REPORT_DR_DETAILS))
704 fprintf (vect_dump, "dependence distance negative.");
709 && abs (dist) < *max_vf)
711 /* The dependence distance requires reduction of the maximal
712 vectorization factor. */
713 *max_vf = abs (dist);
714 if (vect_print_dump_info (REPORT_DR_DETAILS))
715 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
719 if (abs (dist) >= *max_vf)
721 /* Dependence distance does not create dependence, as far as
722 vectorization is concerned, in this case. */
723 if (vect_print_dump_info (REPORT_DR_DETAILS))
724 fprintf (vect_dump, "dependence distance >= VF.");
728 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
730 fprintf (vect_dump, "not vectorized, possible dependence "
731 "between data-refs ");
732 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
733 fprintf (vect_dump, " and ");
734 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
743 /* Function vect_analyze_data_ref_dependences.
745 Examine all the data references in the loop, and make sure there do not
746 exist any data dependences between them. Set *MAX_VF according to
747 the maximum vectorization factor the data dependences allow. */
750 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
751 bb_vec_info bb_vinfo, int *max_vf,
752 bool *data_dependence_in_bb)
755 VEC (ddr_p, heap) *ddrs = NULL;
756 struct data_dependence_relation *ddr;
758 if (vect_print_dump_info (REPORT_DETAILS))
759 fprintf (vect_dump, "=== vect_analyze_dependences ===");
762 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
764 ddrs = BB_VINFO_DDRS (bb_vinfo);
766 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
767 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
768 data_dependence_in_bb))
775 /* Function vect_compute_data_ref_alignment
777 Compute the misalignment of the data reference DR.
780 1. If during the misalignment computation it is found that the data reference
781 cannot be vectorized then false is returned.
782 2. DR_MISALIGNMENT (DR) is defined.
784 FOR NOW: No analysis is actually performed. Misalignment is calculated
785 only for trivial cases. TODO. */
788 vect_compute_data_ref_alignment (struct data_reference *dr)
790 gimple stmt = DR_STMT (dr);
791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
793 struct loop *loop = NULL;
794 tree ref = DR_REF (dr);
796 tree base, base_addr;
799 tree aligned_to, alignment;
801 if (vect_print_dump_info (REPORT_DETAILS))
802 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
805 loop = LOOP_VINFO_LOOP (loop_vinfo);
807 /* Initialize misalignment to unknown. */
808 SET_DR_MISALIGNMENT (dr, -1);
810 misalign = DR_INIT (dr);
811 aligned_to = DR_ALIGNED_TO (dr);
812 base_addr = DR_BASE_ADDRESS (dr);
813 vectype = STMT_VINFO_VECTYPE (stmt_info);
815 /* In case the dataref is in an inner-loop of the loop that is being
816 vectorized (LOOP), we use the base and misalignment information
817 relative to the outer-loop (LOOP). This is ok only if the misalignment
818 stays the same throughout the execution of the inner-loop, which is why
819 we have to check that the stride of the dataref in the inner-loop evenly
820 divides by the vector size. */
821 if (loop && nested_in_vect_loop_p (loop, stmt))
823 tree step = DR_STEP (dr);
824 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
826 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
828 if (vect_print_dump_info (REPORT_ALIGNMENT))
829 fprintf (vect_dump, "inner step divides the vector-size.");
830 misalign = STMT_VINFO_DR_INIT (stmt_info);
831 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
832 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
836 if (vect_print_dump_info (REPORT_ALIGNMENT))
837 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
838 misalign = NULL_TREE;
842 base = build_fold_indirect_ref (base_addr);
843 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
845 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
848 if (vect_print_dump_info (REPORT_ALIGNMENT))
850 fprintf (vect_dump, "Unknown alignment for access: ");
851 print_generic_expr (vect_dump, base, TDF_SLIM);
857 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
859 || (TREE_CODE (base_addr) == SSA_NAME
860 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
861 TREE_TYPE (base_addr)))),
863 || (get_pointer_alignment (base_addr, TYPE_ALIGN (vectype))
864 >= TYPE_ALIGN (vectype)))
867 base_aligned = false;
871 /* Do not change the alignment of global variables if
872 flag_section_anchors is enabled. */
873 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
874 || (TREE_STATIC (base) && flag_section_anchors))
876 if (vect_print_dump_info (REPORT_DETAILS))
878 fprintf (vect_dump, "can't force alignment of ref: ");
879 print_generic_expr (vect_dump, ref, TDF_SLIM);
884 /* Force the alignment of the decl.
885 NOTE: This is the only change to the code we make during
886 the analysis phase, before deciding to vectorize the loop. */
887 if (vect_print_dump_info (REPORT_DETAILS))
889 fprintf (vect_dump, "force alignment of ");
890 print_generic_expr (vect_dump, ref, TDF_SLIM);
893 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
894 DECL_USER_ALIGN (base) = 1;
897 /* At this point we assume that the base is aligned. */
898 gcc_assert (base_aligned
899 || (TREE_CODE (base) == VAR_DECL
900 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
902 /* If this is a backward running DR then first access in the larger
903 vectype actually is N-1 elements before the address in the DR.
904 Adjust misalign accordingly. */
905 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
907 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
908 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
909 otherwise we wouldn't be here. */
910 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
911 /* PLUS because DR_STEP was negative. */
912 misalign = size_binop (PLUS_EXPR, misalign, offset);
915 /* Modulo alignment. */
916 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
918 if (!host_integerp (misalign, 1))
920 /* Negative or overflowed misalignment value. */
921 if (vect_print_dump_info (REPORT_DETAILS))
922 fprintf (vect_dump, "unexpected misalign value");
926 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
928 if (vect_print_dump_info (REPORT_DETAILS))
930 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
931 print_generic_expr (vect_dump, ref, TDF_SLIM);
938 /* Function vect_compute_data_refs_alignment
940 Compute the misalignment of data references in the loop.
941 Return FALSE if a data reference is found that cannot be vectorized. */
944 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
945 bb_vec_info bb_vinfo)
947 VEC (data_reference_p, heap) *datarefs;
948 struct data_reference *dr;
952 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
954 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
956 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
957 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
958 && !vect_compute_data_ref_alignment (dr))
962 /* Mark unsupported statement as unvectorizable. */
963 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
974 /* Function vect_update_misalignment_for_peel
976 DR - the data reference whose misalignment is to be adjusted.
977 DR_PEEL - the data reference whose misalignment is being made
978 zero in the vector loop by the peel.
979 NPEEL - the number of iterations in the peel loop if the misalignment
980 of DR_PEEL is known at compile time. */
983 vect_update_misalignment_for_peel (struct data_reference *dr,
984 struct data_reference *dr_peel, int npeel)
987 VEC(dr_p,heap) *same_align_drs;
988 struct data_reference *current_dr;
989 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
990 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
991 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
992 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
994 /* For interleaved data accesses the step in the loop must be multiplied by
995 the size of the interleaving group. */
996 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
997 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
998 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
999 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1001 /* It can be assumed that the data refs with the same alignment as dr_peel
1002 are aligned in the vector loop. */
1004 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1005 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1007 if (current_dr != dr)
1009 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1010 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1011 SET_DR_MISALIGNMENT (dr, 0);
1015 if (known_alignment_for_access_p (dr)
1016 && known_alignment_for_access_p (dr_peel))
1018 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1019 int misal = DR_MISALIGNMENT (dr);
1020 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1021 misal += negative ? -npeel * dr_size : npeel * dr_size;
1022 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1023 SET_DR_MISALIGNMENT (dr, misal);
1027 if (vect_print_dump_info (REPORT_DETAILS))
1028 fprintf (vect_dump, "Setting misalignment to -1.");
1029 SET_DR_MISALIGNMENT (dr, -1);
1033 /* Function vect_verify_datarefs_alignment
1035 Return TRUE if all data references in the loop can be
1036 handled with respect to alignment. */
1039 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1041 VEC (data_reference_p, heap) *datarefs;
1042 struct data_reference *dr;
1043 enum dr_alignment_support supportable_dr_alignment;
1047 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1049 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1051 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1053 gimple stmt = DR_STMT (dr);
1054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1056 /* For interleaving, only the alignment of the first access matters.
1057 Skip statements marked as not vectorizable. */
1058 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1059 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1060 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1063 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1064 if (!supportable_dr_alignment)
1066 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1068 if (DR_IS_READ (dr))
1070 "not vectorized: unsupported unaligned load.");
1073 "not vectorized: unsupported unaligned store.");
1075 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1079 if (supportable_dr_alignment != dr_aligned
1080 && vect_print_dump_info (REPORT_ALIGNMENT))
1081 fprintf (vect_dump, "Vectorizing an unaligned access.");
1087 /* Function vector_alignment_reachable_p
1089 Return true if vector alignment for DR is reachable by peeling
1090 a few loop iterations. Return false otherwise. */
1093 vector_alignment_reachable_p (struct data_reference *dr)
1095 gimple stmt = DR_STMT (dr);
1096 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1097 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1099 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1101 /* For interleaved access we peel only if number of iterations in
1102 the prolog loop ({VF - misalignment}), is a multiple of the
1103 number of the interleaved accesses. */
1104 int elem_size, mis_in_elements;
1105 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1107 /* FORNOW: handle only known alignment. */
1108 if (!known_alignment_for_access_p (dr))
1111 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1112 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1114 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1118 /* If misalignment is known at the compile time then allow peeling
1119 only if natural alignment is reachable through peeling. */
1120 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1122 HOST_WIDE_INT elmsize =
1123 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1124 if (vect_print_dump_info (REPORT_DETAILS))
1126 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1127 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1129 if (DR_MISALIGNMENT (dr) % elmsize)
1131 if (vect_print_dump_info (REPORT_DETAILS))
1132 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1137 if (!known_alignment_for_access_p (dr))
1139 tree type = (TREE_TYPE (DR_REF (dr)));
1140 tree ba = DR_BASE_OBJECT (dr);
1141 bool is_packed = false;
1144 is_packed = contains_packed_reference (ba);
1146 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1149 if (vect_print_dump_info (REPORT_DETAILS))
1150 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1151 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1161 /* Calculate the cost of the memory access represented by DR. */
1164 vect_get_data_access_cost (struct data_reference *dr,
1165 unsigned int *inside_cost,
1166 unsigned int *outside_cost)
1168 gimple stmt = DR_STMT (dr);
1169 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1170 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1171 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1172 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1173 int ncopies = vf / nunits;
1174 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1176 if (!supportable_dr_alignment)
1177 *inside_cost = VECT_MAX_COST;
1180 if (DR_IS_READ (dr))
1181 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1183 vect_get_store_cost (dr, ncopies, inside_cost);
1186 if (vect_print_dump_info (REPORT_COST))
1187 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1188 "outside_cost = %d.", *inside_cost, *outside_cost);
1193 vect_peeling_hash (const void *elem)
1195 const struct _vect_peel_info *peel_info;
1197 peel_info = (const struct _vect_peel_info *) elem;
1198 return (hashval_t) peel_info->npeel;
1203 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1205 const struct _vect_peel_info *a, *b;
1207 a = (const struct _vect_peel_info *) elem1;
1208 b = (const struct _vect_peel_info *) elem2;
1209 return (a->npeel == b->npeel);
1213 /* Insert DR into peeling hash table with NPEEL as key. */
1216 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1219 struct _vect_peel_info elem, *slot;
1221 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1224 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1230 slot = XNEW (struct _vect_peel_info);
1231 slot->npeel = npeel;
1234 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1239 if (!supportable_dr_alignment && !flag_vect_cost_model)
1240 slot->count += VECT_MAX_COST;
1244 /* Traverse peeling hash table to find peeling option that aligns maximum
1245 number of data accesses. */
1248 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1250 vect_peel_info elem = (vect_peel_info) *slot;
1251 vect_peel_extended_info max = (vect_peel_extended_info) data;
1253 if (elem->count > max->peel_info.count
1254 || (elem->count == max->peel_info.count
1255 && max->peel_info.npeel > elem->npeel))
1257 max->peel_info.npeel = elem->npeel;
1258 max->peel_info.count = elem->count;
1259 max->peel_info.dr = elem->dr;
1266 /* Traverse peeling hash table and calculate cost for each peeling option.
1267 Find the one with the lowest cost. */
1270 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1272 vect_peel_info elem = (vect_peel_info) *slot;
1273 vect_peel_extended_info min = (vect_peel_extended_info) data;
1274 int save_misalignment, dummy;
1275 unsigned int inside_cost = 0, outside_cost = 0, i;
1276 gimple stmt = DR_STMT (elem->dr);
1277 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1278 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1279 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1280 struct data_reference *dr;
1282 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1284 stmt = DR_STMT (dr);
1285 stmt_info = vinfo_for_stmt (stmt);
1286 /* For interleaving, only the alignment of the first access
1288 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1289 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1292 save_misalignment = DR_MISALIGNMENT (dr);
1293 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1294 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1295 SET_DR_MISALIGNMENT (dr, save_misalignment);
1298 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1299 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1301 if (inside_cost < min->inside_cost
1302 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1304 min->inside_cost = inside_cost;
1305 min->outside_cost = outside_cost;
1306 min->peel_info.dr = elem->dr;
1307 min->peel_info.npeel = elem->npeel;
1314 /* Choose best peeling option by traversing peeling hash table and either
1315 choosing an option with the lowest cost (if cost model is enabled) or the
1316 option that aligns as many accesses as possible. */
1318 static struct data_reference *
1319 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1320 unsigned int *npeel)
1322 struct _vect_peel_extended_info res;
1324 res.peel_info.dr = NULL;
1326 if (flag_vect_cost_model)
1328 res.inside_cost = INT_MAX;
1329 res.outside_cost = INT_MAX;
1330 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1331 vect_peeling_hash_get_lowest_cost, &res);
1335 res.peel_info.count = 0;
1336 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1337 vect_peeling_hash_get_most_frequent, &res);
1340 *npeel = res.peel_info.npeel;
1341 return res.peel_info.dr;
1345 /* Function vect_enhance_data_refs_alignment
1347 This pass will use loop versioning and loop peeling in order to enhance
1348 the alignment of data references in the loop.
1350 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1351 original loop is to be vectorized. Any other loops that are created by
1352 the transformations performed in this pass - are not supposed to be
1353 vectorized. This restriction will be relaxed.
1355 This pass will require a cost model to guide it whether to apply peeling
1356 or versioning or a combination of the two. For example, the scheme that
1357 intel uses when given a loop with several memory accesses, is as follows:
1358 choose one memory access ('p') which alignment you want to force by doing
1359 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1360 other accesses are not necessarily aligned, or (2) use loop versioning to
1361 generate one loop in which all accesses are aligned, and another loop in
1362 which only 'p' is necessarily aligned.
1364 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1365 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1366 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1368 Devising a cost model is the most critical aspect of this work. It will
1369 guide us on which access to peel for, whether to use loop versioning, how
1370 many versions to create, etc. The cost model will probably consist of
1371 generic considerations as well as target specific considerations (on
1372 powerpc for example, misaligned stores are more painful than misaligned
1375 Here are the general steps involved in alignment enhancements:
1377 -- original loop, before alignment analysis:
1378 for (i=0; i<N; i++){
1379 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1380 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1383 -- After vect_compute_data_refs_alignment:
1384 for (i=0; i<N; i++){
1385 x = q[i]; # DR_MISALIGNMENT(q) = 3
1386 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1389 -- Possibility 1: we do loop versioning:
1391 for (i=0; i<N; i++){ # loop 1A
1392 x = q[i]; # DR_MISALIGNMENT(q) = 3
1393 p[i] = y; # DR_MISALIGNMENT(p) = 0
1397 for (i=0; i<N; i++){ # loop 1B
1398 x = q[i]; # DR_MISALIGNMENT(q) = 3
1399 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1403 -- Possibility 2: we do loop peeling:
1404 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1408 for (i = 3; i < N; i++){ # loop 2A
1409 x = q[i]; # DR_MISALIGNMENT(q) = 0
1410 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1413 -- Possibility 3: combination of loop peeling and versioning:
1414 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1419 for (i = 3; i<N; i++){ # loop 3A
1420 x = q[i]; # DR_MISALIGNMENT(q) = 0
1421 p[i] = y; # DR_MISALIGNMENT(p) = 0
1425 for (i = 3; i<N; i++){ # loop 3B
1426 x = q[i]; # DR_MISALIGNMENT(q) = 0
1427 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1431 These loops are later passed to loop_transform to be vectorized. The
1432 vectorizer will use the alignment information to guide the transformation
1433 (whether to generate regular loads/stores, or with special handling for
1437 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1439 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1440 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1441 enum dr_alignment_support supportable_dr_alignment;
1442 struct data_reference *dr0 = NULL, *first_store = NULL;
1443 struct data_reference *dr;
1445 bool do_peeling = false;
1446 bool do_versioning = false;
1449 stmt_vec_info stmt_info;
1450 int vect_versioning_for_alias_required;
1451 unsigned int npeel = 0;
1452 bool all_misalignments_unknown = true;
1453 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1454 unsigned possible_npeel_number = 1;
1456 unsigned int nelements, mis, same_align_drs_max = 0;
1458 if (vect_print_dump_info (REPORT_DETAILS))
1459 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1461 /* While cost model enhancements are expected in the future, the high level
1462 view of the code at this time is as follows:
1464 A) If there is a misaligned access then see if peeling to align
1465 this access can make all data references satisfy
1466 vect_supportable_dr_alignment. If so, update data structures
1467 as needed and return true.
1469 B) If peeling wasn't possible and there is a data reference with an
1470 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1471 then see if loop versioning checks can be used to make all data
1472 references satisfy vect_supportable_dr_alignment. If so, update
1473 data structures as needed and return true.
1475 C) If neither peeling nor versioning were successful then return false if
1476 any data reference does not satisfy vect_supportable_dr_alignment.
1478 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1480 Note, Possibility 3 above (which is peeling and versioning together) is not
1481 being done at this time. */
1483 /* (1) Peeling to force alignment. */
1485 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1487 + How many accesses will become aligned due to the peeling
1488 - How many accesses will become unaligned due to the peeling,
1489 and the cost of misaligned accesses.
1490 - The cost of peeling (the extra runtime checks, the increase
1493 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1495 stmt = DR_STMT (dr);
1496 stmt_info = vinfo_for_stmt (stmt);
1498 if (!STMT_VINFO_RELEVANT (stmt_info))
1501 /* For interleaving, only the alignment of the first access
1503 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1504 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1507 /* For invariant accesses there is nothing to enhance. */
1508 if (integer_zerop (DR_STEP (dr)))
1511 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1512 do_peeling = vector_alignment_reachable_p (dr);
1515 if (known_alignment_for_access_p (dr))
1517 unsigned int npeel_tmp;
1518 bool negative = tree_int_cst_compare (DR_STEP (dr),
1519 size_zero_node) < 0;
1521 /* Save info about DR in the hash table. */
1522 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1523 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1524 htab_create (1, vect_peeling_hash,
1525 vect_peeling_hash_eq, free);
1527 vectype = STMT_VINFO_VECTYPE (stmt_info);
1528 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1529 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1530 TREE_TYPE (DR_REF (dr))));
1531 npeel_tmp = (negative
1532 ? (mis - nelements) : (nelements - mis))
1535 /* For multiple types, it is possible that the bigger type access
1536 will have more than one peeling option. E.g., a loop with two
1537 types: one of size (vector size / 4), and the other one of
1538 size (vector size / 8). Vectorization factor will 8. If both
1539 access are misaligned by 3, the first one needs one scalar
1540 iteration to be aligned, and the second one needs 5. But the
1541 the first one will be aligned also by peeling 5 scalar
1542 iterations, and in that case both accesses will be aligned.
1543 Hence, except for the immediate peeling amount, we also want
1544 to try to add full vector size, while we don't exceed
1545 vectorization factor.
1546 We do this automtically for cost model, since we calculate cost
1547 for every peeling option. */
1548 if (!flag_vect_cost_model)
1549 possible_npeel_number = vf /nelements;
1551 /* Handle the aligned case. We may decide to align some other
1552 access, making DR unaligned. */
1553 if (DR_MISALIGNMENT (dr) == 0)
1556 if (!flag_vect_cost_model)
1557 possible_npeel_number++;
1560 for (j = 0; j < possible_npeel_number; j++)
1562 gcc_assert (npeel_tmp <= vf);
1563 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1564 npeel_tmp += nelements;
1567 all_misalignments_unknown = false;
1568 /* Data-ref that was chosen for the case that all the
1569 misalignments are unknown is not relevant anymore, since we
1570 have a data-ref with known alignment. */
1575 /* If we don't know all the misalignment values, we prefer
1576 peeling for data-ref that has maximum number of data-refs
1577 with the same alignment, unless the target prefers to align
1578 stores over load. */
1579 if (all_misalignments_unknown)
1581 if (same_align_drs_max < VEC_length (dr_p,
1582 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1585 same_align_drs_max = VEC_length (dr_p,
1586 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1590 if (!first_store && DR_IS_WRITE (dr))
1594 /* If there are both known and unknown misaligned accesses in the
1595 loop, we choose peeling amount according to the known
1599 if (!supportable_dr_alignment)
1602 if (!first_store && DR_IS_WRITE (dr))
1609 if (!aligned_access_p (dr))
1611 if (vect_print_dump_info (REPORT_DETAILS))
1612 fprintf (vect_dump, "vector alignment may not be reachable");
1619 vect_versioning_for_alias_required
1620 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1622 /* Temporarily, if versioning for alias is required, we disable peeling
1623 until we support peeling and versioning. Often peeling for alignment
1624 will require peeling for loop-bound, which in turn requires that we
1625 know how to adjust the loop ivs after the loop. */
1626 if (vect_versioning_for_alias_required
1627 || !vect_can_advance_ivs_p (loop_vinfo)
1628 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1631 if (do_peeling && all_misalignments_unknown
1632 && vect_supportable_dr_alignment (dr0, false))
1635 /* Check if the target requires to prefer stores over loads, i.e., if
1636 misaligned stores are more expensive than misaligned loads (taking
1637 drs with same alignment into account). */
1638 if (first_store && DR_IS_READ (dr0))
1640 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1641 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1642 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1643 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1645 vect_get_data_access_cost (dr0, &load_inside_cost,
1646 &load_outside_cost);
1647 vect_get_data_access_cost (first_store, &store_inside_cost,
1648 &store_outside_cost);
1650 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1651 aligning the load DR0). */
1652 load_inside_penalty = store_inside_cost;
1653 load_outside_penalty = store_outside_cost;
1654 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1655 (vinfo_for_stmt (DR_STMT (first_store))),
1658 if (DR_IS_READ (dr))
1660 load_inside_penalty += load_inside_cost;
1661 load_outside_penalty += load_outside_cost;
1665 load_inside_penalty += store_inside_cost;
1666 load_outside_penalty += store_outside_cost;
1669 /* Calculate the penalty for leaving DR0 unaligned (by
1670 aligning the FIRST_STORE). */
1671 store_inside_penalty = load_inside_cost;
1672 store_outside_penalty = load_outside_cost;
1673 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1674 (vinfo_for_stmt (DR_STMT (dr0))),
1677 if (DR_IS_READ (dr))
1679 store_inside_penalty += load_inside_cost;
1680 store_outside_penalty += load_outside_cost;
1684 store_inside_penalty += store_inside_cost;
1685 store_outside_penalty += store_outside_cost;
1688 if (load_inside_penalty > store_inside_penalty
1689 || (load_inside_penalty == store_inside_penalty
1690 && load_outside_penalty > store_outside_penalty))
1694 /* In case there are only loads with different unknown misalignments, use
1695 peeling only if it may help to align other accesses in the loop. */
1696 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1697 (vinfo_for_stmt (DR_STMT (dr0))))
1698 && vect_supportable_dr_alignment (dr0, false)
1699 != dr_unaligned_supported)
1703 if (do_peeling && !dr0)
1705 /* Peeling is possible, but there is no data access that is not supported
1706 unless aligned. So we try to choose the best possible peeling. */
1708 /* We should get here only if there are drs with known misalignment. */
1709 gcc_assert (!all_misalignments_unknown);
1711 /* Choose the best peeling from the hash table. */
1712 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1719 stmt = DR_STMT (dr0);
1720 stmt_info = vinfo_for_stmt (stmt);
1721 vectype = STMT_VINFO_VECTYPE (stmt_info);
1722 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1724 if (known_alignment_for_access_p (dr0))
1726 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1727 size_zero_node) < 0;
1730 /* Since it's known at compile time, compute the number of
1731 iterations in the peeled loop (the peeling factor) for use in
1732 updating DR_MISALIGNMENT values. The peeling factor is the
1733 vectorization factor minus the misalignment as an element
1735 mis = DR_MISALIGNMENT (dr0);
1736 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1737 npeel = ((negative ? mis - nelements : nelements - mis)
1741 /* For interleaved data access every iteration accesses all the
1742 members of the group, therefore we divide the number of iterations
1743 by the group size. */
1744 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1745 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1746 npeel /= GROUP_SIZE (stmt_info);
1748 if (vect_print_dump_info (REPORT_DETAILS))
1749 fprintf (vect_dump, "Try peeling by %d", npeel);
1752 /* Ensure that all data refs can be vectorized after the peel. */
1753 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1755 int save_misalignment;
1760 stmt = DR_STMT (dr);
1761 stmt_info = vinfo_for_stmt (stmt);
1762 /* For interleaving, only the alignment of the first access
1764 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1765 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1768 save_misalignment = DR_MISALIGNMENT (dr);
1769 vect_update_misalignment_for_peel (dr, dr0, npeel);
1770 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1771 SET_DR_MISALIGNMENT (dr, save_misalignment);
1773 if (!supportable_dr_alignment)
1780 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1782 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1791 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1792 If the misalignment of DR_i is identical to that of dr0 then set
1793 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1794 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1795 by the peeling factor times the element size of DR_i (MOD the
1796 vectorization factor times the size). Otherwise, the
1797 misalignment of DR_i must be set to unknown. */
1798 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1800 vect_update_misalignment_for_peel (dr, dr0, npeel);
1802 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1804 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1806 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1807 SET_DR_MISALIGNMENT (dr0, 0);
1808 if (vect_print_dump_info (REPORT_ALIGNMENT))
1809 fprintf (vect_dump, "Alignment of access forced using peeling.");
1811 if (vect_print_dump_info (REPORT_DETAILS))
1812 fprintf (vect_dump, "Peeling for alignment will be applied.");
1814 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1821 /* (2) Versioning to force alignment. */
1823 /* Try versioning if:
1824 1) flag_tree_vect_loop_version is TRUE
1825 2) optimize loop for speed
1826 3) there is at least one unsupported misaligned data ref with an unknown
1828 4) all misaligned data refs with a known misalignment are supported, and
1829 5) the number of runtime alignment checks is within reason. */
1832 flag_tree_vect_loop_version
1833 && optimize_loop_nest_for_speed_p (loop)
1834 && (!loop->inner); /* FORNOW */
1838 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1840 stmt = DR_STMT (dr);
1841 stmt_info = vinfo_for_stmt (stmt);
1843 /* For interleaving, only the alignment of the first access
1845 if (aligned_access_p (dr)
1846 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1847 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1850 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1852 if (!supportable_dr_alignment)
1858 if (known_alignment_for_access_p (dr)
1859 || VEC_length (gimple,
1860 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1861 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1863 do_versioning = false;
1867 stmt = DR_STMT (dr);
1868 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1869 gcc_assert (vectype);
1871 /* The rightmost bits of an aligned address must be zeros.
1872 Construct the mask needed for this test. For example,
1873 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1874 mask must be 15 = 0xf. */
1875 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1877 /* FORNOW: use the same mask to test all potentially unaligned
1878 references in the loop. The vectorizer currently supports
1879 a single vector size, see the reference to
1880 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1881 vectorization factor is computed. */
1882 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1883 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1884 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1885 VEC_safe_push (gimple, heap,
1886 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1891 /* Versioning requires at least one misaligned data reference. */
1892 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1893 do_versioning = false;
1894 else if (!do_versioning)
1895 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1900 VEC(gimple,heap) *may_misalign_stmts
1901 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1904 /* It can now be assumed that the data references in the statements
1905 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1906 of the loop being vectorized. */
1907 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1909 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1910 dr = STMT_VINFO_DATA_REF (stmt_info);
1911 SET_DR_MISALIGNMENT (dr, 0);
1912 if (vect_print_dump_info (REPORT_ALIGNMENT))
1913 fprintf (vect_dump, "Alignment of access forced using versioning.");
1916 if (vect_print_dump_info (REPORT_DETAILS))
1917 fprintf (vect_dump, "Versioning for alignment will be applied.");
1919 /* Peeling and versioning can't be done together at this time. */
1920 gcc_assert (! (do_peeling && do_versioning));
1922 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1927 /* This point is reached if neither peeling nor versioning is being done. */
1928 gcc_assert (! (do_peeling || do_versioning));
1930 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1935 /* Function vect_find_same_alignment_drs.
1937 Update group and alignment relations according to the chosen
1938 vectorization factor. */
1941 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1942 loop_vec_info loop_vinfo)
1945 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1946 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1947 struct data_reference *dra = DDR_A (ddr);
1948 struct data_reference *drb = DDR_B (ddr);
1949 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1950 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1951 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1952 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1953 lambda_vector dist_v;
1954 unsigned int loop_depth;
1956 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1962 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1965 /* Loop-based vectorization and known data dependence. */
1966 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1969 /* Data-dependence analysis reports a distance vector of zero
1970 for data-references that overlap only in the first iteration
1971 but have different sign step (see PR45764).
1972 So as a sanity check require equal DR_STEP. */
1973 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1976 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1977 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1979 int dist = dist_v[loop_depth];
1981 if (vect_print_dump_info (REPORT_DR_DETAILS))
1982 fprintf (vect_dump, "dependence distance = %d.", dist);
1984 /* Same loop iteration. */
1986 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1988 /* Two references with distance zero have the same alignment. */
1989 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1990 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1991 if (vect_print_dump_info (REPORT_ALIGNMENT))
1992 fprintf (vect_dump, "accesses have the same alignment.");
1993 if (vect_print_dump_info (REPORT_DR_DETAILS))
1995 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1996 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1997 fprintf (vect_dump, " and ");
1998 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2005 /* Function vect_analyze_data_refs_alignment
2007 Analyze the alignment of the data-references in the loop.
2008 Return FALSE if a data reference is found that cannot be vectorized. */
2011 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2012 bb_vec_info bb_vinfo)
2014 if (vect_print_dump_info (REPORT_DETAILS))
2015 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2017 /* Mark groups of data references with same alignment using
2018 data dependence information. */
2021 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2022 struct data_dependence_relation *ddr;
2025 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2026 vect_find_same_alignment_drs (ddr, loop_vinfo);
2029 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2031 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2033 "not vectorized: can't calculate alignment for data ref.");
2041 /* Analyze groups of strided accesses: check that DR belongs to a group of
2042 strided accesses of legal size, step, etc. Detect gaps, single element
2043 interleaving, and other special cases. Set strided access info.
2044 Collect groups of strided stores for further use in SLP analysis. */
2047 vect_analyze_group_access (struct data_reference *dr)
2049 tree step = DR_STEP (dr);
2050 tree scalar_type = TREE_TYPE (DR_REF (dr));
2051 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2052 gimple stmt = DR_STMT (dr);
2053 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2055 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2056 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2057 HOST_WIDE_INT stride, last_accessed_element = 1;
2058 bool slp_impossible = false;
2060 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2061 interleaving group (including gaps). */
2062 stride = dr_step / type_size;
2064 /* Not consecutive access is possible only if it is a part of interleaving. */
2065 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2067 /* Check if it this DR is a part of interleaving, and is a single
2068 element of the group that is accessed in the loop. */
2070 /* Gaps are supported only for loads. STEP must be a multiple of the type
2071 size. The size of the group must be a power of 2. */
2073 && (dr_step % type_size) == 0
2075 && exact_log2 (stride) != -1)
2077 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2078 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2079 if (vect_print_dump_info (REPORT_DR_DETAILS))
2081 fprintf (vect_dump, "Detected single element interleaving ");
2082 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2083 fprintf (vect_dump, " step ");
2084 print_generic_expr (vect_dump, step, TDF_SLIM);
2089 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2091 if (vect_print_dump_info (REPORT_DETAILS))
2092 fprintf (vect_dump, "Data access with gaps requires scalar "
2099 if (vect_print_dump_info (REPORT_DETAILS))
2101 fprintf (vect_dump, "not consecutive access ");
2102 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2107 /* Mark the statement as unvectorizable. */
2108 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2115 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2117 /* First stmt in the interleaving chain. Check the chain. */
2118 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2119 struct data_reference *data_ref = dr;
2120 unsigned int count = 1;
2122 tree prev_init = DR_INIT (data_ref);
2124 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2128 /* Skip same data-refs. In case that two or more stmts share
2129 data-ref (supported only for loads), we vectorize only the first
2130 stmt, and the rest get their vectorized loads from the first
2132 if (!tree_int_cst_compare (DR_INIT (data_ref),
2133 DR_INIT (STMT_VINFO_DATA_REF (
2134 vinfo_for_stmt (next)))))
2136 if (DR_IS_WRITE (data_ref))
2138 if (vect_print_dump_info (REPORT_DETAILS))
2139 fprintf (vect_dump, "Two store stmts share the same dr.");
2143 /* Check that there is no load-store dependencies for this loads
2144 to prevent a case of load-store-load to the same location. */
2145 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2146 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2148 if (vect_print_dump_info (REPORT_DETAILS))
2150 "READ_WRITE dependence in interleaving.");
2154 /* For load use the same data-ref load. */
2155 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2158 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2164 /* Check that all the accesses have the same STEP. */
2165 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2166 if (tree_int_cst_compare (step, next_step))
2168 if (vect_print_dump_info (REPORT_DETAILS))
2169 fprintf (vect_dump, "not consecutive access in interleaving");
2173 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2174 /* Check that the distance between two accesses is equal to the type
2175 size. Otherwise, we have gaps. */
2176 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2177 - TREE_INT_CST_LOW (prev_init)) / type_size;
2180 /* FORNOW: SLP of accesses with gaps is not supported. */
2181 slp_impossible = true;
2182 if (DR_IS_WRITE (data_ref))
2184 if (vect_print_dump_info (REPORT_DETAILS))
2185 fprintf (vect_dump, "interleaved store with gaps");
2192 last_accessed_element += diff;
2194 /* Store the gap from the previous member of the group. If there is no
2195 gap in the access, GROUP_GAP is always 1. */
2196 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2198 prev_init = DR_INIT (data_ref);
2199 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2200 /* Count the number of data-refs in the chain. */
2204 /* COUNT is the number of accesses found, we multiply it by the size of
2205 the type to get COUNT_IN_BYTES. */
2206 count_in_bytes = type_size * count;
2208 /* Check that the size of the interleaving (including gaps) is not
2209 greater than STEP. */
2210 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2212 if (vect_print_dump_info (REPORT_DETAILS))
2214 fprintf (vect_dump, "interleaving size is greater than step for ");
2215 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2220 /* Check that the size of the interleaving is equal to STEP for stores,
2221 i.e., that there are no gaps. */
2222 if (dr_step && dr_step != count_in_bytes)
2224 if (DR_IS_READ (dr))
2226 slp_impossible = true;
2227 /* There is a gap after the last load in the group. This gap is a
2228 difference between the stride and the number of elements. When
2229 there is no gap, this difference should be 0. */
2230 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2234 if (vect_print_dump_info (REPORT_DETAILS))
2235 fprintf (vect_dump, "interleaved store with gaps");
2240 /* Check that STEP is a multiple of type size. */
2241 if (dr_step && (dr_step % type_size) != 0)
2243 if (vect_print_dump_info (REPORT_DETAILS))
2245 fprintf (vect_dump, "step is not a multiple of type size: step ");
2246 print_generic_expr (vect_dump, step, TDF_SLIM);
2247 fprintf (vect_dump, " size ");
2248 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2257 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2258 if (vect_print_dump_info (REPORT_DETAILS))
2259 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2261 /* SLP: create an SLP data structure for every interleaving group of
2262 stores for further analysis in vect_analyse_slp. */
2263 if (DR_IS_WRITE (dr) && !slp_impossible)
2266 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2269 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2273 /* There is a gap in the end of the group. */
2274 if (stride - last_accessed_element > 0 && loop_vinfo)
2276 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2277 if (vect_print_dump_info (REPORT_DETAILS))
2278 fprintf (vect_dump, "Data access with gaps requires scalar "
2287 /* Analyze the access pattern of the data-reference DR.
2288 In case of non-consecutive accesses call vect_analyze_group_access() to
2289 analyze groups of strided accesses. */
2292 vect_analyze_data_ref_access (struct data_reference *dr)
2294 tree step = DR_STEP (dr);
2295 tree scalar_type = TREE_TYPE (DR_REF (dr));
2296 gimple stmt = DR_STMT (dr);
2297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2298 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2299 struct loop *loop = NULL;
2300 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2303 loop = LOOP_VINFO_LOOP (loop_vinfo);
2305 if (loop_vinfo && !step)
2307 if (vect_print_dump_info (REPORT_DETAILS))
2308 fprintf (vect_dump, "bad data-ref access in loop");
2312 /* Allow invariant loads in loops. */
2313 if (loop_vinfo && dr_step == 0)
2315 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2316 return DR_IS_READ (dr);
2319 if (loop && nested_in_vect_loop_p (loop, stmt))
2321 /* Interleaved accesses are not yet supported within outer-loop
2322 vectorization for references in the inner-loop. */
2323 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2325 /* For the rest of the analysis we use the outer-loop step. */
2326 step = STMT_VINFO_DR_STEP (stmt_info);
2327 dr_step = TREE_INT_CST_LOW (step);
2331 if (vect_print_dump_info (REPORT_ALIGNMENT))
2332 fprintf (vect_dump, "zero step in outer loop.");
2333 if (DR_IS_READ (dr))
2341 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2343 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2345 /* Mark that it is not interleaving. */
2346 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2350 if (loop && nested_in_vect_loop_p (loop, stmt))
2352 if (vect_print_dump_info (REPORT_ALIGNMENT))
2353 fprintf (vect_dump, "strided access in outer loop.");
2357 /* Not consecutive access - check if it's a part of interleaving group. */
2358 return vect_analyze_group_access (dr);
2362 /* Function vect_analyze_data_ref_accesses.
2364 Analyze the access pattern of all the data references in the loop.
2366 FORNOW: the only access pattern that is considered vectorizable is a
2367 simple step 1 (consecutive) access.
2369 FORNOW: handle only arrays and pointer accesses. */
2372 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2375 VEC (data_reference_p, heap) *datarefs;
2376 struct data_reference *dr;
2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2382 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2384 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2386 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2387 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2388 && !vect_analyze_data_ref_access (dr))
2390 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2391 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2395 /* Mark the statement as not vectorizable. */
2396 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2406 /* Function vect_prune_runtime_alias_test_list.
2408 Prune a list of ddrs to be tested at run-time by versioning for alias.
2409 Return FALSE if resulting list of ddrs is longer then allowed by
2410 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2413 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2415 VEC (ddr_p, heap) * ddrs =
2416 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2419 if (vect_print_dump_info (REPORT_DETAILS))
2420 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2422 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2427 ddr_i = VEC_index (ddr_p, ddrs, i);
2430 for (j = 0; j < i; j++)
2432 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2434 if (vect_vfa_range_equal (ddr_i, ddr_j))
2436 if (vect_print_dump_info (REPORT_DR_DETAILS))
2438 fprintf (vect_dump, "found equal ranges ");
2439 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2440 fprintf (vect_dump, ", ");
2441 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2442 fprintf (vect_dump, " and ");
2443 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2444 fprintf (vect_dump, ", ");
2445 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2454 VEC_ordered_remove (ddr_p, ddrs, i);
2460 if (VEC_length (ddr_p, ddrs) >
2461 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2463 if (vect_print_dump_info (REPORT_DR_DETAILS))
2466 "disable versioning for alias - max number of generated "
2467 "checks exceeded.");
2470 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2479 /* Function vect_analyze_data_refs.
2481 Find all the data references in the loop or basic block.
2483 The general structure of the analysis of data refs in the vectorizer is as
2485 1- vect_analyze_data_refs(loop/bb): call
2486 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2487 in the loop/bb and their dependences.
2488 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2489 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2490 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2495 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2496 bb_vec_info bb_vinfo,
2499 struct loop *loop = NULL;
2500 basic_block bb = NULL;
2502 VEC (data_reference_p, heap) *datarefs;
2503 struct data_reference *dr;
2507 if (vect_print_dump_info (REPORT_DETAILS))
2508 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2512 loop = LOOP_VINFO_LOOP (loop_vinfo);
2513 res = compute_data_dependences_for_loop
2515 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2516 &LOOP_VINFO_DATAREFS (loop_vinfo),
2517 &LOOP_VINFO_DDRS (loop_vinfo));
2521 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2522 fprintf (vect_dump, "not vectorized: loop contains function calls"
2523 " or data references that cannot be analyzed");
2527 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2531 bb = BB_VINFO_BB (bb_vinfo);
2532 res = compute_data_dependences_for_bb (bb, true,
2533 &BB_VINFO_DATAREFS (bb_vinfo),
2534 &BB_VINFO_DDRS (bb_vinfo));
2537 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2538 fprintf (vect_dump, "not vectorized: basic block contains function"
2539 " calls or data references that cannot be analyzed");
2543 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2546 /* Go through the data-refs, check that the analysis succeeded. Update
2547 pointer from stmt_vec_info struct to DR and vectype. */
2549 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2552 stmt_vec_info stmt_info;
2553 tree base, offset, init;
2556 if (!dr || !DR_REF (dr))
2558 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2559 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2563 stmt = DR_STMT (dr);
2564 stmt_info = vinfo_for_stmt (stmt);
2566 /* Check that analysis of the data-ref succeeded. */
2567 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2570 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2572 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2573 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2578 /* Mark the statement as not vectorizable. */
2579 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2586 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2588 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2589 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2593 /* Mark the statement as not vectorizable. */
2594 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2601 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2603 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2605 fprintf (vect_dump, "not vectorized: volatile type ");
2606 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2611 base = unshare_expr (DR_BASE_ADDRESS (dr));
2612 offset = unshare_expr (DR_OFFSET (dr));
2613 init = unshare_expr (DR_INIT (dr));
2615 if (stmt_can_throw_internal (stmt))
2617 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2619 fprintf (vect_dump, "not vectorized: statement can throw an "
2621 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2626 /* Update DR field in stmt_vec_info struct. */
2628 /* If the dataref is in an inner-loop of the loop that is considered for
2629 for vectorization, we also want to analyze the access relative to
2630 the outer-loop (DR contains information only relative to the
2631 inner-most enclosing loop). We do that by building a reference to the
2632 first location accessed by the inner-loop, and analyze it relative to
2634 if (loop && nested_in_vect_loop_p (loop, stmt))
2636 tree outer_step, outer_base, outer_init;
2637 HOST_WIDE_INT pbitsize, pbitpos;
2639 enum machine_mode pmode;
2640 int punsignedp, pvolatilep;
2641 affine_iv base_iv, offset_iv;
2644 /* Build a reference to the first location accessed by the
2645 inner-loop: *(BASE+INIT). (The first location is actually
2646 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2647 tree inner_base = build_fold_indirect_ref
2648 (fold_build2 (POINTER_PLUS_EXPR,
2649 TREE_TYPE (base), base,
2650 fold_convert (sizetype, init)));
2652 if (vect_print_dump_info (REPORT_DETAILS))
2654 fprintf (vect_dump, "analyze in outer-loop: ");
2655 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2658 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2659 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2660 gcc_assert (outer_base != NULL_TREE);
2662 if (pbitpos % BITS_PER_UNIT != 0)
2664 if (vect_print_dump_info (REPORT_DETAILS))
2665 fprintf (vect_dump, "failed: bit offset alignment.\n");
2669 outer_base = build_fold_addr_expr (outer_base);
2670 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2673 if (vect_print_dump_info (REPORT_DETAILS))
2674 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2681 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2689 offset_iv.base = ssize_int (0);
2690 offset_iv.step = ssize_int (0);
2692 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2695 if (vect_print_dump_info (REPORT_DETAILS))
2696 fprintf (vect_dump, "evolution of offset is not affine.\n");
2700 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2701 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2702 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2703 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2704 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2706 outer_step = size_binop (PLUS_EXPR,
2707 fold_convert (ssizetype, base_iv.step),
2708 fold_convert (ssizetype, offset_iv.step));
2710 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2711 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2712 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2713 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2714 STMT_VINFO_DR_OFFSET (stmt_info) =
2715 fold_convert (ssizetype, offset_iv.base);
2716 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2717 size_int (highest_pow2_factor (offset_iv.base));
2719 if (vect_print_dump_info (REPORT_DETAILS))
2721 fprintf (vect_dump, "\touter base_address: ");
2722 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2723 fprintf (vect_dump, "\n\touter offset from base address: ");
2724 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2725 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2726 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2727 fprintf (vect_dump, "\n\touter step: ");
2728 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2729 fprintf (vect_dump, "\n\touter aligned to: ");
2730 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2734 if (STMT_VINFO_DATA_REF (stmt_info))
2736 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2739 "not vectorized: more than one data ref in stmt: ");
2740 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2745 STMT_VINFO_DATA_REF (stmt_info) = dr;
2747 /* Set vectype for STMT. */
2748 scalar_type = TREE_TYPE (DR_REF (dr));
2749 STMT_VINFO_VECTYPE (stmt_info) =
2750 get_vectype_for_scalar_type (scalar_type);
2751 if (!STMT_VINFO_VECTYPE (stmt_info))
2753 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2756 "not vectorized: no vectype for stmt: ");
2757 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2758 fprintf (vect_dump, " scalar_type: ");
2759 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2764 /* Mark the statement as not vectorizable. */
2765 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2772 /* Adjust the minimal vectorization factor according to the
2774 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2783 /* Function vect_get_new_vect_var.
2785 Returns a name for a new variable. The current naming scheme appends the
2786 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2787 the name of vectorizer generated variables, and appends that to NAME if
2791 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2798 case vect_simple_var:
2801 case vect_scalar_var:
2804 case vect_pointer_var:
2813 char* tmp = concat (prefix, name, NULL);
2814 new_vect_var = create_tmp_var (type, tmp);
2818 new_vect_var = create_tmp_var (type, prefix);
2820 /* Mark vector typed variable as a gimple register variable. */
2821 if (TREE_CODE (type) == VECTOR_TYPE)
2822 DECL_GIMPLE_REG_P (new_vect_var) = true;
2824 return new_vect_var;
2828 /* Function vect_create_addr_base_for_vector_ref.
2830 Create an expression that computes the address of the first memory location
2831 that will be accessed for a data reference.
2834 STMT: The statement containing the data reference.
2835 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2836 OFFSET: Optional. If supplied, it is be added to the initial address.
2837 LOOP: Specify relative to which loop-nest should the address be computed.
2838 For example, when the dataref is in an inner-loop nested in an
2839 outer-loop that is now being vectorized, LOOP can be either the
2840 outer-loop, or the inner-loop. The first memory location accessed
2841 by the following dataref ('in' points to short):
2848 if LOOP=i_loop: &in (relative to i_loop)
2849 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2852 1. Return an SSA_NAME whose value is the address of the memory location of
2853 the first vector of the data reference.
2854 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2855 these statement(s) which define the returned SSA_NAME.
2857 FORNOW: We are only handling array accesses with step 1. */
2860 vect_create_addr_base_for_vector_ref (gimple stmt,
2861 gimple_seq *new_stmt_list,
2865 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2866 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2867 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2869 tree data_ref_base_var;
2871 tree addr_base, addr_expr;
2873 gimple_seq seq = NULL;
2874 tree base_offset = unshare_expr (DR_OFFSET (dr));
2875 tree init = unshare_expr (DR_INIT (dr));
2877 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2878 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2881 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2883 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2885 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2887 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2888 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2889 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2893 base_name = build_fold_indirect_ref (data_ref_base);
2896 base_offset = ssize_int (0);
2897 init = ssize_int (0);
2898 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2901 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2902 add_referenced_var (data_ref_base_var);
2903 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2905 gimple_seq_add_seq (new_stmt_list, seq);
2907 /* Create base_offset */
2908 base_offset = size_binop (PLUS_EXPR,
2909 fold_convert (sizetype, base_offset),
2910 fold_convert (sizetype, init));
2911 dest = create_tmp_var (sizetype, "base_off");
2912 add_referenced_var (dest);
2913 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2914 gimple_seq_add_seq (new_stmt_list, seq);
2918 tree tmp = create_tmp_var (sizetype, "offset");
2920 add_referenced_var (tmp);
2921 offset = fold_build2 (MULT_EXPR, sizetype,
2922 fold_convert (sizetype, offset), step);
2923 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2924 base_offset, offset);
2925 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2926 gimple_seq_add_seq (new_stmt_list, seq);
2929 /* base + base_offset */
2931 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2932 data_ref_base, base_offset);
2935 addr_base = build1 (ADDR_EXPR,
2936 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2937 unshare_expr (DR_REF (dr)));
2940 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2941 base = get_base_address (DR_REF (dr));
2943 && TREE_CODE (base) == MEM_REF)
2945 = build_qualified_type (vect_ptr_type,
2946 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2948 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2949 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2950 get_name (base_name));
2951 add_referenced_var (addr_expr);
2952 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2953 gimple_seq_add_seq (new_stmt_list, seq);
2955 if (DR_PTR_INFO (dr)
2956 && TREE_CODE (vec_stmt) == SSA_NAME)
2958 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2961 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2962 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2966 if (vect_print_dump_info (REPORT_DETAILS))
2968 fprintf (vect_dump, "created ");
2969 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2976 /* Function vect_create_data_ref_ptr.
2978 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2979 location accessed in the loop by STMT, along with the def-use update
2980 chain to appropriately advance the pointer through the loop iterations.
2981 Also set aliasing information for the pointer. This pointer is used by
2982 the callers to this function to create a memory reference expression for
2983 vector load/store access.
2986 1. STMT: a stmt that references memory. Expected to be of the form
2987 GIMPLE_ASSIGN <name, data-ref> or
2988 GIMPLE_ASSIGN <data-ref, name>.
2989 2. AGGR_TYPE: the type of the reference, which should be either a vector
2991 3. AT_LOOP: the loop where the vector memref is to be created.
2992 4. OFFSET (optional): an offset to be added to the initial address accessed
2993 by the data-ref in STMT.
2994 5. BSI: location where the new stmts are to be placed if there is no loop
2995 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2996 pointing to the initial address.
2999 1. Declare a new ptr to vector_type, and have it point to the base of the
3000 data reference (initial addressed accessed by the data reference).
3001 For example, for vector of type V8HI, the following code is generated:
3004 ap = (v8hi *)initial_address;
3006 if OFFSET is not supplied:
3007 initial_address = &a[init];
3008 if OFFSET is supplied:
3009 initial_address = &a[init + OFFSET];
3011 Return the initial_address in INITIAL_ADDRESS.
3013 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3014 update the pointer in each iteration of the loop.
3016 Return the increment stmt that updates the pointer in PTR_INCR.
3018 3. Set INV_P to true if the access pattern of the data reference in the
3019 vectorized loop is invariant. Set it to false otherwise.
3021 4. Return the pointer. */
3024 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3025 tree offset, tree *initial_address,
3026 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3027 bool only_init, bool *inv_p)
3030 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3032 struct loop *loop = NULL;
3033 bool nested_in_vect_loop = false;
3034 struct loop *containing_loop = NULL;
3039 gimple_seq new_stmt_list = NULL;
3043 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3045 gimple_stmt_iterator incr_gsi;
3048 tree indx_before_incr, indx_after_incr;
3051 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3054 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3055 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3059 loop = LOOP_VINFO_LOOP (loop_vinfo);
3060 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3061 containing_loop = (gimple_bb (stmt))->loop_father;
3062 pe = loop_preheader_edge (loop);
3066 gcc_assert (bb_vinfo);
3071 /* Check the step (evolution) of the load in LOOP, and record
3072 whether it's invariant. */
3073 if (nested_in_vect_loop)
3074 step = STMT_VINFO_DR_STEP (stmt_info);
3076 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3078 if (tree_int_cst_compare (step, size_zero_node) == 0)
3082 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3084 /* Create an expression for the first address accessed by this load
3086 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3088 if (vect_print_dump_info (REPORT_DETAILS))
3090 tree data_ref_base = base_name;
3091 fprintf (vect_dump, "create %s-pointer variable to type: ",
3092 tree_code_name[(int) TREE_CODE (aggr_type)]);
3093 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3094 if (TREE_CODE (data_ref_base) == VAR_DECL
3095 || TREE_CODE (data_ref_base) == ARRAY_REF)
3096 fprintf (vect_dump, " vectorizing an array ref: ");
3097 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3098 fprintf (vect_dump, " vectorizing a record based array ref: ");
3099 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3100 fprintf (vect_dump, " vectorizing a pointer ref: ");
3101 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3104 /* (1) Create the new aggregate-pointer variable. */
3105 aggr_ptr_type = build_pointer_type (aggr_type);
3106 base = get_base_address (DR_REF (dr));
3108 && TREE_CODE (base) == MEM_REF)
3110 = build_qualified_type (aggr_ptr_type,
3111 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3112 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3113 get_name (base_name));
3115 /* Vector and array types inherit the alias set of their component
3116 type by default so we need to use a ref-all pointer if the data
3117 reference does not conflict with the created aggregated data
3118 reference because it is not addressable. */
3119 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3120 get_alias_set (DR_REF (dr))))
3123 = build_pointer_type_for_mode (aggr_type,
3124 TYPE_MODE (aggr_ptr_type), true);
3125 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3126 get_name (base_name));
3129 /* Likewise for any of the data references in the stmt group. */
3130 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3132 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3135 tree lhs = gimple_assign_lhs (orig_stmt);
3136 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3137 get_alias_set (lhs)))
3140 = build_pointer_type_for_mode (aggr_type,
3141 TYPE_MODE (aggr_ptr_type), true);
3143 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3144 get_name (base_name));
3148 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3153 add_referenced_var (aggr_ptr);
3155 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3156 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3157 def-use update cycles for the pointer: one relative to the outer-loop
3158 (LOOP), which is what steps (3) and (4) below do. The other is relative
3159 to the inner-loop (which is the inner-most loop containing the dataref),
3160 and this is done be step (5) below.
3162 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3163 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3164 redundant. Steps (3),(4) create the following:
3167 LOOP: vp1 = phi(vp0,vp2)
3173 If there is an inner-loop nested in loop, then step (5) will also be
3174 applied, and an additional update in the inner-loop will be created:
3177 LOOP: vp1 = phi(vp0,vp2)
3179 inner: vp3 = phi(vp1,vp4)
3180 vp4 = vp3 + inner_step
3186 /* (2) Calculate the initial address of the aggregate-pointer, and set
3187 the aggregate-pointer to point to it before the loop. */
3189 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3191 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3197 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3198 gcc_assert (!new_bb);
3201 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3204 *initial_address = new_temp;
3206 /* Create: p = (aggr_type *) initial_base */
3207 if (TREE_CODE (new_temp) != SSA_NAME
3208 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3210 vec_stmt = gimple_build_assign (aggr_ptr,
3211 fold_convert (aggr_ptr_type, new_temp));
3212 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3213 /* Copy the points-to information if it exists. */
3214 if (DR_PTR_INFO (dr))
3215 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3216 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3219 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3220 gcc_assert (!new_bb);
3223 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3226 aggr_ptr_init = new_temp;
3228 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3229 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3230 inner-loop nested in LOOP (during outer-loop vectorization). */
3232 /* No update in loop is required. */
3233 if (only_init && (!loop_vinfo || at_loop == loop))
3234 aptr = aggr_ptr_init;
3237 /* The step of the aggregate pointer is the type size. */
3238 tree step = TYPE_SIZE_UNIT (aggr_type);
3239 /* One exception to the above is when the scalar step of the load in
3240 LOOP is zero. In this case the step here is also zero. */
3242 step = size_zero_node;
3244 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3246 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3248 create_iv (aggr_ptr_init,
3249 fold_convert (aggr_ptr_type, step),
3250 aggr_ptr, loop, &incr_gsi, insert_after,
3251 &indx_before_incr, &indx_after_incr);
3252 incr = gsi_stmt (incr_gsi);
3253 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3255 /* Copy the points-to information if it exists. */
3256 if (DR_PTR_INFO (dr))
3258 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3259 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3264 aptr = indx_before_incr;
3267 if (!nested_in_vect_loop || only_init)
3271 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3272 nested in LOOP, if exists. */
3274 gcc_assert (nested_in_vect_loop);
3277 standard_iv_increment_position (containing_loop, &incr_gsi,
3279 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3280 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3282 incr = gsi_stmt (incr_gsi);
3283 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3285 /* Copy the points-to information if it exists. */
3286 if (DR_PTR_INFO (dr))
3288 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3289 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3294 return indx_before_incr;
3301 /* Function bump_vector_ptr
3303 Increment a pointer (to a vector type) by vector-size. If requested,
3304 i.e. if PTR-INCR is given, then also connect the new increment stmt
3305 to the existing def-use update-chain of the pointer, by modifying
3306 the PTR_INCR as illustrated below:
3308 The pointer def-use update-chain before this function:
3309 DATAREF_PTR = phi (p_0, p_2)
3311 PTR_INCR: p_2 = DATAREF_PTR + step
3313 The pointer def-use update-chain after this function:
3314 DATAREF_PTR = phi (p_0, p_2)
3316 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3318 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3321 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3323 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3324 the loop. The increment amount across iterations is expected
3326 BSI - location where the new update stmt is to be placed.
3327 STMT - the original scalar memory-access stmt that is being vectorized.
3328 BUMP - optional. The offset by which to bump the pointer. If not given,
3329 the offset is assumed to be vector_size.
3331 Output: Return NEW_DATAREF_PTR as illustrated above.
3336 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3337 gimple stmt, tree bump)
3339 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3340 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3341 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3342 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3343 tree update = TYPE_SIZE_UNIT (vectype);
3346 use_operand_p use_p;
3347 tree new_dataref_ptr;
3352 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3353 dataref_ptr, update);
3354 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3355 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3356 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3358 /* Copy the points-to information if it exists. */
3359 if (DR_PTR_INFO (dr))
3361 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3362 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3363 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3367 return new_dataref_ptr;
3369 /* Update the vector-pointer's cross-iteration increment. */
3370 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3372 tree use = USE_FROM_PTR (use_p);
3374 if (use == dataref_ptr)
3375 SET_USE (use_p, new_dataref_ptr);
3377 gcc_assert (tree_int_cst_compare (use, update) == 0);
3380 return new_dataref_ptr;
3384 /* Function vect_create_destination_var.
3386 Create a new temporary of type VECTYPE. */
3389 vect_create_destination_var (tree scalar_dest, tree vectype)
3392 const char *new_name;
3394 enum vect_var_kind kind;
3396 kind = vectype ? vect_simple_var : vect_scalar_var;
3397 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3399 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3401 new_name = get_name (scalar_dest);
3404 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3405 add_referenced_var (vec_dest);
3410 /* Function vect_strided_store_supported.
3412 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3413 and FALSE otherwise. */
3416 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3418 optab interleave_high_optab, interleave_low_optab;
3419 enum machine_mode mode;
3421 mode = TYPE_MODE (vectype);
3423 /* vect_permute_store_chain requires the group size to be a power of two. */
3424 if (exact_log2 (count) == -1)
3426 if (vect_print_dump_info (REPORT_DETAILS))
3427 fprintf (vect_dump, "the size of the group of strided accesses"
3428 " is not a power of 2");
3432 /* Check that the operation is supported. */
3433 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3434 vectype, optab_default);
3435 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3436 vectype, optab_default);
3437 if (!interleave_high_optab || !interleave_low_optab)
3439 if (vect_print_dump_info (REPORT_DETAILS))
3440 fprintf (vect_dump, "no optab for interleave.");
3444 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3445 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3447 if (vect_print_dump_info (REPORT_DETAILS))
3448 fprintf (vect_dump, "interleave op not supported by target.");
3456 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3460 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3462 return vect_lanes_optab_supported_p ("vec_store_lanes",
3463 vec_store_lanes_optab,
3468 /* Function vect_permute_store_chain.
3470 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3471 a power of 2, generate interleave_high/low stmts to reorder the data
3472 correctly for the stores. Return the final references for stores in
3475 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3476 The input is 4 vectors each containing 8 elements. We assign a number to
3477 each element, the input sequence is:
3479 1st vec: 0 1 2 3 4 5 6 7
3480 2nd vec: 8 9 10 11 12 13 14 15
3481 3rd vec: 16 17 18 19 20 21 22 23
3482 4th vec: 24 25 26 27 28 29 30 31
3484 The output sequence should be:
3486 1st vec: 0 8 16 24 1 9 17 25
3487 2nd vec: 2 10 18 26 3 11 19 27
3488 3rd vec: 4 12 20 28 5 13 21 30
3489 4th vec: 6 14 22 30 7 15 23 31
3491 i.e., we interleave the contents of the four vectors in their order.
3493 We use interleave_high/low instructions to create such output. The input of
3494 each interleave_high/low operation is two vectors:
3497 the even elements of the result vector are obtained left-to-right from the
3498 high/low elements of the first vector. The odd elements of the result are
3499 obtained left-to-right from the high/low elements of the second vector.
3500 The output of interleave_high will be: 0 4 1 5
3501 and of interleave_low: 2 6 3 7
3504 The permutation is done in log LENGTH stages. In each stage interleave_high
3505 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3506 where the first argument is taken from the first half of DR_CHAIN and the
3507 second argument from it's second half.
3510 I1: interleave_high (1st vec, 3rd vec)
3511 I2: interleave_low (1st vec, 3rd vec)
3512 I3: interleave_high (2nd vec, 4th vec)
3513 I4: interleave_low (2nd vec, 4th vec)
3515 The output for the first stage is:
3517 I1: 0 16 1 17 2 18 3 19
3518 I2: 4 20 5 21 6 22 7 23
3519 I3: 8 24 9 25 10 26 11 27
3520 I4: 12 28 13 29 14 30 15 31
3522 The output of the second stage, i.e. the final result is:
3524 I1: 0 8 16 24 1 9 17 25
3525 I2: 2 10 18 26 3 11 19 27
3526 I3: 4 12 20 28 5 13 21 30
3527 I4: 6 14 22 30 7 15 23 31. */
3530 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3531 unsigned int length,
3533 gimple_stmt_iterator *gsi,
3534 VEC(tree,heap) **result_chain)
3536 tree perm_dest, vect1, vect2, high, low;
3538 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3541 enum tree_code high_code, low_code;
3543 gcc_assert (vect_strided_store_supported (vectype, length));
3545 *result_chain = VEC_copy (tree, heap, dr_chain);
3547 for (i = 0; i < exact_log2 (length); i++)
3549 for (j = 0; j < length/2; j++)
3551 vect1 = VEC_index (tree, dr_chain, j);
3552 vect2 = VEC_index (tree, dr_chain, j+length/2);
3554 /* Create interleaving stmt:
3555 in the case of big endian:
3556 high = interleave_high (vect1, vect2)
3557 and in the case of little endian:
3558 high = interleave_low (vect1, vect2). */
3559 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3560 DECL_GIMPLE_REG_P (perm_dest) = 1;
3561 add_referenced_var (perm_dest);
3562 if (BYTES_BIG_ENDIAN)
3564 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3565 low_code = VEC_INTERLEAVE_LOW_EXPR;
3569 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3570 high_code = VEC_INTERLEAVE_LOW_EXPR;
3572 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3574 high = make_ssa_name (perm_dest, perm_stmt);
3575 gimple_assign_set_lhs (perm_stmt, high);
3576 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3577 VEC_replace (tree, *result_chain, 2*j, high);
3579 /* Create interleaving stmt:
3580 in the case of big endian:
3581 low = interleave_low (vect1, vect2)
3582 and in the case of little endian:
3583 low = interleave_high (vect1, vect2). */
3584 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3585 DECL_GIMPLE_REG_P (perm_dest) = 1;
3586 add_referenced_var (perm_dest);
3587 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3589 low = make_ssa_name (perm_dest, perm_stmt);
3590 gimple_assign_set_lhs (perm_stmt, low);
3591 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3592 VEC_replace (tree, *result_chain, 2*j+1, low);
3594 dr_chain = VEC_copy (tree, heap, *result_chain);
3598 /* Function vect_setup_realignment
3600 This function is called when vectorizing an unaligned load using
3601 the dr_explicit_realign[_optimized] scheme.
3602 This function generates the following code at the loop prolog:
3605 x msq_init = *(floor(p)); # prolog load
3606 realignment_token = call target_builtin;
3608 x msq = phi (msq_init, ---)
3610 The stmts marked with x are generated only for the case of
3611 dr_explicit_realign_optimized.
3613 The code above sets up a new (vector) pointer, pointing to the first
3614 location accessed by STMT, and a "floor-aligned" load using that pointer.
3615 It also generates code to compute the "realignment-token" (if the relevant
3616 target hook was defined), and creates a phi-node at the loop-header bb
3617 whose arguments are the result of the prolog-load (created by this
3618 function) and the result of a load that takes place in the loop (to be
3619 created by the caller to this function).
3621 For the case of dr_explicit_realign_optimized:
3622 The caller to this function uses the phi-result (msq) to create the
3623 realignment code inside the loop, and sets up the missing phi argument,
3626 msq = phi (msq_init, lsq)
3627 lsq = *(floor(p')); # load in loop
3628 result = realign_load (msq, lsq, realignment_token);
3630 For the case of dr_explicit_realign:
3632 msq = *(floor(p)); # load in loop
3634 lsq = *(floor(p')); # load in loop
3635 result = realign_load (msq, lsq, realignment_token);
3638 STMT - (scalar) load stmt to be vectorized. This load accesses
3639 a memory location that may be unaligned.
3640 BSI - place where new code is to be inserted.
3641 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3645 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3646 target hook, if defined.
3647 Return value - the result of the loop-header phi node. */
3650 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3651 tree *realignment_token,
3652 enum dr_alignment_support alignment_support_scheme,
3654 struct loop **at_loop)
3656 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3657 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3658 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3659 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3660 struct loop *loop = NULL;
3662 tree scalar_dest = gimple_assign_lhs (stmt);
3669 tree msq_init = NULL_TREE;
3672 tree msq = NULL_TREE;
3673 gimple_seq stmts = NULL;
3675 bool compute_in_loop = false;
3676 bool nested_in_vect_loop = false;
3677 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3678 struct loop *loop_for_initial_load = NULL;
3682 loop = LOOP_VINFO_LOOP (loop_vinfo);
3683 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3686 gcc_assert (alignment_support_scheme == dr_explicit_realign
3687 || alignment_support_scheme == dr_explicit_realign_optimized);
3689 /* We need to generate three things:
3690 1. the misalignment computation
3691 2. the extra vector load (for the optimized realignment scheme).
3692 3. the phi node for the two vectors from which the realignment is
3693 done (for the optimized realignment scheme). */
3695 /* 1. Determine where to generate the misalignment computation.
3697 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3698 calculation will be generated by this function, outside the loop (in the
3699 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3700 caller, inside the loop.
3702 Background: If the misalignment remains fixed throughout the iterations of
3703 the loop, then both realignment schemes are applicable, and also the
3704 misalignment computation can be done outside LOOP. This is because we are
3705 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3706 are a multiple of VS (the Vector Size), and therefore the misalignment in
3707 different vectorized LOOP iterations is always the same.
3708 The problem arises only if the memory access is in an inner-loop nested
3709 inside LOOP, which is now being vectorized using outer-loop vectorization.
3710 This is the only case when the misalignment of the memory access may not
3711 remain fixed throughout the iterations of the inner-loop (as explained in
3712 detail in vect_supportable_dr_alignment). In this case, not only is the
3713 optimized realignment scheme not applicable, but also the misalignment
3714 computation (and generation of the realignment token that is passed to
3715 REALIGN_LOAD) have to be done inside the loop.
3717 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3718 or not, which in turn determines if the misalignment is computed inside
3719 the inner-loop, or outside LOOP. */
3721 if (init_addr != NULL_TREE || !loop_vinfo)
3723 compute_in_loop = true;
3724 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3728 /* 2. Determine where to generate the extra vector load.
3730 For the optimized realignment scheme, instead of generating two vector
3731 loads in each iteration, we generate a single extra vector load in the
3732 preheader of the loop, and in each iteration reuse the result of the
3733 vector load from the previous iteration. In case the memory access is in
3734 an inner-loop nested inside LOOP, which is now being vectorized using
3735 outer-loop vectorization, we need to determine whether this initial vector
3736 load should be generated at the preheader of the inner-loop, or can be
3737 generated at the preheader of LOOP. If the memory access has no evolution
3738 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3739 to be generated inside LOOP (in the preheader of the inner-loop). */
3741 if (nested_in_vect_loop)
3743 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3744 bool invariant_in_outerloop =
3745 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3746 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3749 loop_for_initial_load = loop;
3751 *at_loop = loop_for_initial_load;
3753 if (loop_for_initial_load)
3754 pe = loop_preheader_edge (loop_for_initial_load);
3756 /* 3. For the case of the optimized realignment, create the first vector
3757 load at the loop preheader. */
3759 if (alignment_support_scheme == dr_explicit_realign_optimized)
3761 /* Create msq_init = *(floor(p1)) in the loop preheader */
3763 gcc_assert (!compute_in_loop);
3764 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3765 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3766 NULL_TREE, &init_addr, NULL, &inc,
3768 new_stmt = gimple_build_assign_with_ops
3769 (BIT_AND_EXPR, NULL_TREE, ptr,
3770 build_int_cst (TREE_TYPE (ptr),
3771 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3772 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3773 gimple_assign_set_lhs (new_stmt, new_temp);
3774 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3775 gcc_assert (!new_bb);
3777 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3778 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3779 new_stmt = gimple_build_assign (vec_dest, data_ref);
3780 new_temp = make_ssa_name (vec_dest, new_stmt);
3781 gimple_assign_set_lhs (new_stmt, new_temp);
3782 mark_symbols_for_renaming (new_stmt);
3785 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3786 gcc_assert (!new_bb);
3789 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3791 msq_init = gimple_assign_lhs (new_stmt);
3794 /* 4. Create realignment token using a target builtin, if available.
3795 It is done either inside the containing loop, or before LOOP (as
3796 determined above). */
3798 if (targetm.vectorize.builtin_mask_for_load)
3802 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3805 /* Generate the INIT_ADDR computation outside LOOP. */
3806 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3810 pe = loop_preheader_edge (loop);
3811 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3812 gcc_assert (!new_bb);
3815 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3818 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3819 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3821 vect_create_destination_var (scalar_dest,
3822 gimple_call_return_type (new_stmt));
3823 new_temp = make_ssa_name (vec_dest, new_stmt);
3824 gimple_call_set_lhs (new_stmt, new_temp);
3826 if (compute_in_loop)
3827 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3830 /* Generate the misalignment computation outside LOOP. */
3831 pe = loop_preheader_edge (loop);
3832 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3833 gcc_assert (!new_bb);
3836 *realignment_token = gimple_call_lhs (new_stmt);
3838 /* The result of the CALL_EXPR to this builtin is determined from
3839 the value of the parameter and no global variables are touched
3840 which makes the builtin a "const" function. Requiring the
3841 builtin to have the "const" attribute makes it unnecessary
3842 to call mark_call_clobbered. */
3843 gcc_assert (TREE_READONLY (builtin_decl));
3846 if (alignment_support_scheme == dr_explicit_realign)
3849 gcc_assert (!compute_in_loop);
3850 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3853 /* 5. Create msq = phi <msq_init, lsq> in loop */
3855 pe = loop_preheader_edge (containing_loop);
3856 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3857 msq = make_ssa_name (vec_dest, NULL);
3858 phi_stmt = create_phi_node (msq, containing_loop->header);
3859 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3860 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3866 /* Function vect_strided_load_supported.
3868 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3869 and FALSE otherwise. */
3872 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3874 optab perm_even_optab, perm_odd_optab;
3875 enum machine_mode mode;
3877 mode = TYPE_MODE (vectype);
3879 /* vect_permute_load_chain requires the group size to be a power of two. */
3880 if (exact_log2 (count) == -1)
3882 if (vect_print_dump_info (REPORT_DETAILS))
3883 fprintf (vect_dump, "the size of the group of strided accesses"
3884 " is not a power of 2");
3888 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3890 if (!perm_even_optab)
3892 if (vect_print_dump_info (REPORT_DETAILS))
3893 fprintf (vect_dump, "no optab for perm_even.");
3897 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3899 if (vect_print_dump_info (REPORT_DETAILS))
3900 fprintf (vect_dump, "perm_even op not supported by target.");
3904 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3906 if (!perm_odd_optab)
3908 if (vect_print_dump_info (REPORT_DETAILS))
3909 fprintf (vect_dump, "no optab for perm_odd.");
3913 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3915 if (vect_print_dump_info (REPORT_DETAILS))
3916 fprintf (vect_dump, "perm_odd op not supported by target.");
3922 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3926 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3928 return vect_lanes_optab_supported_p ("vec_load_lanes",
3929 vec_load_lanes_optab,
3933 /* Function vect_permute_load_chain.
3935 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3936 a power of 2, generate extract_even/odd stmts to reorder the input data
3937 correctly. Return the final references for loads in RESULT_CHAIN.
3939 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3940 The input is 4 vectors each containing 8 elements. We assign a number to each
3941 element, the input sequence is:
3943 1st vec: 0 1 2 3 4 5 6 7
3944 2nd vec: 8 9 10 11 12 13 14 15
3945 3rd vec: 16 17 18 19 20 21 22 23
3946 4th vec: 24 25 26 27 28 29 30 31
3948 The output sequence should be:
3950 1st vec: 0 4 8 12 16 20 24 28
3951 2nd vec: 1 5 9 13 17 21 25 29
3952 3rd vec: 2 6 10 14 18 22 26 30
3953 4th vec: 3 7 11 15 19 23 27 31
3955 i.e., the first output vector should contain the first elements of each
3956 interleaving group, etc.
3958 We use extract_even/odd instructions to create such output. The input of
3959 each extract_even/odd operation is two vectors
3963 and the output is the vector of extracted even/odd elements. The output of
3964 extract_even will be: 0 2 4 6
3965 and of extract_odd: 1 3 5 7
3968 The permutation is done in log LENGTH stages. In each stage extract_even
3969 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3970 their order. In our example,
3972 E1: extract_even (1st vec, 2nd vec)
3973 E2: extract_odd (1st vec, 2nd vec)
3974 E3: extract_even (3rd vec, 4th vec)
3975 E4: extract_odd (3rd vec, 4th vec)
3977 The output for the first stage will be:
3979 E1: 0 2 4 6 8 10 12 14
3980 E2: 1 3 5 7 9 11 13 15
3981 E3: 16 18 20 22 24 26 28 30
3982 E4: 17 19 21 23 25 27 29 31
3984 In order to proceed and create the correct sequence for the next stage (or
3985 for the correct output, if the second stage is the last one, as in our
3986 example), we first put the output of extract_even operation and then the
3987 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3988 The input for the second stage is:
3990 1st vec (E1): 0 2 4 6 8 10 12 14
3991 2nd vec (E3): 16 18 20 22 24 26 28 30
3992 3rd vec (E2): 1 3 5 7 9 11 13 15
3993 4th vec (E4): 17 19 21 23 25 27 29 31
3995 The output of the second stage:
3997 E1: 0 4 8 12 16 20 24 28
3998 E2: 2 6 10 14 18 22 26 30
3999 E3: 1 5 9 13 17 21 25 29
4000 E4: 3 7 11 15 19 23 27 31
4002 And RESULT_CHAIN after reordering:
4004 1st vec (E1): 0 4 8 12 16 20 24 28
4005 2nd vec (E3): 1 5 9 13 17 21 25 29
4006 3rd vec (E2): 2 6 10 14 18 22 26 30
4007 4th vec (E4): 3 7 11 15 19 23 27 31. */
4010 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4011 unsigned int length,
4013 gimple_stmt_iterator *gsi,
4014 VEC(tree,heap) **result_chain)
4016 tree perm_dest, data_ref, first_vect, second_vect;
4018 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4022 gcc_assert (vect_strided_load_supported (vectype, length));
4024 *result_chain = VEC_copy (tree, heap, dr_chain);
4025 for (i = 0; i < exact_log2 (length); i++)
4027 for (j = 0; j < length; j +=2)
4029 first_vect = VEC_index (tree, dr_chain, j);
4030 second_vect = VEC_index (tree, dr_chain, j+1);
4032 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4033 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4034 DECL_GIMPLE_REG_P (perm_dest) = 1;
4035 add_referenced_var (perm_dest);
4037 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4038 perm_dest, first_vect,
4041 data_ref = make_ssa_name (perm_dest, perm_stmt);
4042 gimple_assign_set_lhs (perm_stmt, data_ref);
4043 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4044 mark_symbols_for_renaming (perm_stmt);
4046 VEC_replace (tree, *result_chain, j/2, data_ref);
4048 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4049 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4050 DECL_GIMPLE_REG_P (perm_dest) = 1;
4051 add_referenced_var (perm_dest);
4053 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4054 perm_dest, first_vect,
4056 data_ref = make_ssa_name (perm_dest, perm_stmt);
4057 gimple_assign_set_lhs (perm_stmt, data_ref);
4058 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4059 mark_symbols_for_renaming (perm_stmt);
4061 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4063 dr_chain = VEC_copy (tree, heap, *result_chain);
4068 /* Function vect_transform_strided_load.
4070 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4071 to perform their permutation and ascribe the result vectorized statements to
4072 the scalar statements.
4076 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4077 gimple_stmt_iterator *gsi)
4079 VEC(tree,heap) *result_chain = NULL;
4081 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4082 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4083 vectors, that are ready for vector computation. */
4084 result_chain = VEC_alloc (tree, heap, size);
4085 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4086 vect_record_strided_load_vectors (stmt, result_chain);
4087 VEC_free (tree, heap, result_chain);
4090 /* RESULT_CHAIN contains the output of a group of strided loads that were
4091 generated as part of the vectorization of STMT. Assign the statement
4092 for each vector to the associated scalar statement. */
4095 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4097 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4098 gimple next_stmt, new_stmt;
4099 unsigned int i, gap_count;
4102 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4103 Since we scan the chain starting from it's first node, their order
4104 corresponds the order of data-refs in RESULT_CHAIN. */
4105 next_stmt = first_stmt;
4107 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4112 /* Skip the gaps. Loads created for the gaps will be removed by dead
4113 code elimination pass later. No need to check for the first stmt in
4114 the group, since it always exists.
4115 GROUP_GAP is the number of steps in elements from the previous
4116 access (if there is no gap GROUP_GAP is 1). We skip loads that
4117 correspond to the gaps. */
4118 if (next_stmt != first_stmt
4119 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4127 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4128 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4129 copies, and we put the new vector statement in the first available
4131 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4132 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4135 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4138 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4140 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4143 prev_stmt = rel_stmt;
4145 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4148 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4153 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4155 /* If NEXT_STMT accesses the same DR as the previous statement,
4156 put the same TMP_DATA_REF as its vectorized statement; otherwise
4157 get the next data-ref from RESULT_CHAIN. */
4158 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4164 /* Function vect_force_dr_alignment_p.
4166 Returns whether the alignment of a DECL can be forced to be aligned
4167 on ALIGNMENT bit boundary. */
4170 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4172 if (TREE_CODE (decl) != VAR_DECL)
4175 if (DECL_EXTERNAL (decl))
4178 if (TREE_ASM_WRITTEN (decl))
4181 if (TREE_STATIC (decl))
4182 return (alignment <= MAX_OFILE_ALIGNMENT);
4184 return (alignment <= MAX_STACK_ALIGNMENT);
4188 /* Return whether the data reference DR is supported with respect to its
4190 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4191 it is aligned, i.e., check if it is possible to vectorize it with different
4194 enum dr_alignment_support
4195 vect_supportable_dr_alignment (struct data_reference *dr,
4196 bool check_aligned_accesses)
4198 gimple stmt = DR_STMT (dr);
4199 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4200 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4201 enum machine_mode mode = TYPE_MODE (vectype);
4202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4203 struct loop *vect_loop = NULL;
4204 bool nested_in_vect_loop = false;
4206 if (aligned_access_p (dr) && !check_aligned_accesses)
4211 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4212 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4215 /* Possibly unaligned access. */
4217 /* We can choose between using the implicit realignment scheme (generating
4218 a misaligned_move stmt) and the explicit realignment scheme (generating
4219 aligned loads with a REALIGN_LOAD). There are two variants to the
4220 explicit realignment scheme: optimized, and unoptimized.
4221 We can optimize the realignment only if the step between consecutive
4222 vector loads is equal to the vector size. Since the vector memory
4223 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4224 is guaranteed that the misalignment amount remains the same throughout the
4225 execution of the vectorized loop. Therefore, we can create the
4226 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4227 at the loop preheader.
4229 However, in the case of outer-loop vectorization, when vectorizing a
4230 memory access in the inner-loop nested within the LOOP that is now being
4231 vectorized, while it is guaranteed that the misalignment of the
4232 vectorized memory access will remain the same in different outer-loop
4233 iterations, it is *not* guaranteed that is will remain the same throughout
4234 the execution of the inner-loop. This is because the inner-loop advances
4235 with the original scalar step (and not in steps of VS). If the inner-loop
4236 step happens to be a multiple of VS, then the misalignment remains fixed
4237 and we can use the optimized realignment scheme. For example:
4243 When vectorizing the i-loop in the above example, the step between
4244 consecutive vector loads is 1, and so the misalignment does not remain
4245 fixed across the execution of the inner-loop, and the realignment cannot
4246 be optimized (as illustrated in the following pseudo vectorized loop):
4248 for (i=0; i<N; i+=4)
4249 for (j=0; j<M; j++){
4250 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4251 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4252 // (assuming that we start from an aligned address).
4255 We therefore have to use the unoptimized realignment scheme:
4257 for (i=0; i<N; i+=4)
4258 for (j=k; j<M; j+=4)
4259 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4260 // that the misalignment of the initial address is
4263 The loop can then be vectorized as follows:
4265 for (k=0; k<4; k++){
4266 rt = get_realignment_token (&vp[k]);
4267 for (i=0; i<N; i+=4){
4269 for (j=k; j<M; j+=4){
4271 va = REALIGN_LOAD <v1,v2,rt>;
4278 if (DR_IS_READ (dr))
4280 bool is_packed = false;
4281 tree type = (TREE_TYPE (DR_REF (dr)));
4283 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4284 && (!targetm.vectorize.builtin_mask_for_load
4285 || targetm.vectorize.builtin_mask_for_load ()))
4287 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4288 if ((nested_in_vect_loop
4289 && (TREE_INT_CST_LOW (DR_STEP (dr))
4290 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4292 return dr_explicit_realign;
4294 return dr_explicit_realign_optimized;
4296 if (!known_alignment_for_access_p (dr))
4298 tree ba = DR_BASE_OBJECT (dr);
4301 is_packed = contains_packed_reference (ba);
4304 if (targetm.vectorize.
4305 support_vector_misalignment (mode, type,
4306 DR_MISALIGNMENT (dr), is_packed))
4307 /* Can't software pipeline the loads, but can at least do them. */
4308 return dr_unaligned_supported;
4312 bool is_packed = false;
4313 tree type = (TREE_TYPE (DR_REF (dr)));
4315 if (!known_alignment_for_access_p (dr))
4317 tree ba = DR_BASE_OBJECT (dr);
4320 is_packed = contains_packed_reference (ba);
4323 if (targetm.vectorize.
4324 support_vector_misalignment (mode, type,
4325 DR_MISALIGNMENT (dr), is_packed))
4326 return dr_unaligned_supported;
4330 return dr_unaligned_unsupported;