1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Need to include rtl.h, expr.h, etc. for optabs. */
46 /* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
50 vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
53 enum machine_mode mode, array_mode;
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
61 if (array_mode == BLKmode)
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
85 /* Return the smallest scalar part of STMT.
86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
88 result-type is different than the type of the arguments (e.g., demotion,
89 promotion), vectype will be reset appropriately (later). Note that we have
90 to visit the smallest datatype in this function, because that determines the
91 VF. If the smallest datatype in the loop is present only as the rhs of a
92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
103 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
114 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
120 scalar_type = rhs_type;
123 *lhs_size_unit = lhs;
124 *rhs_size_unit = rhs;
129 /* Find the place of the data-ref in STMT in the interleaving chain that starts
130 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
133 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135 gimple next_stmt = first_stmt;
138 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
141 while (next_stmt && next_stmt != stmt)
144 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
154 /* Function vect_insert_into_interleaving_chain.
156 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
159 vect_insert_into_interleaving_chain (struct data_reference *dra,
160 struct data_reference *drb)
164 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
165 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
168 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
171 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
172 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
175 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
176 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
180 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
183 /* We got to the end of the list. Insert here. */
184 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
185 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
189 /* Function vect_update_interleaving_chain.
191 For two data-refs DRA and DRB that are a part of a chain interleaved data
192 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
194 There are four possible cases:
195 1. New stmts - both DRA and DRB are not a part of any chain:
198 2. DRB is a part of a chain and DRA is not:
199 no need to update FIRST_DR
200 no need to insert DRB
201 insert DRA according to init
202 3. DRA is a part of a chain and DRB is not:
203 if (init of FIRST_DR > init of DRB)
205 NEXT(FIRST_DR) = previous FIRST_DR
207 insert DRB according to its init
208 4. both DRA and DRB are in some interleaving chains:
209 choose the chain with the smallest init of FIRST_DR
210 insert the nodes of the second chain into the first one. */
213 vect_update_interleaving_chain (struct data_reference *drb,
214 struct data_reference *dra)
216 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
217 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
218 tree next_init, init_dra_chain, init_drb_chain;
219 gimple first_a, first_b;
221 gimple node, prev, next, first_stmt;
223 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
224 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
226 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
227 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
228 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
232 /* 2. DRB is a part of a chain and DRA is not. */
233 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
235 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
236 /* Insert DRA into the chain of DRB. */
237 vect_insert_into_interleaving_chain (dra, drb);
241 /* 3. DRA is a part of a chain and DRB is not. */
242 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
244 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
245 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
249 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 /* DRB's init is smaller than the init of the stmt previously marked
252 as the first stmt of the interleaving chain of DRA. Therefore, we
253 update FIRST_STMT and put DRB in the head of the list. */
254 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
255 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
257 /* Update all the stmts in the list to point to the new FIRST_STMT. */
258 tmp = old_first_stmt;
261 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
262 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
267 /* Insert DRB in the list of DRA. */
268 vect_insert_into_interleaving_chain (drb, dra);
269 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
274 /* 4. both DRA and DRB are in some interleaving chains. */
275 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
276 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
277 if (first_a == first_b)
279 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
280 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 /* Insert the nodes of DRA chain into the DRB chain.
285 After inserting a node, continue from this node of the DRB chain (don't
286 start from the beginning. */
287 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
288 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
289 first_stmt = first_b;
293 /* Insert the nodes of DRB chain into the DRA chain.
294 After inserting a node, continue from this node of the DRA chain (don't
295 start from the beginning. */
296 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
297 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
298 first_stmt = first_a;
303 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
304 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
307 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
308 if (tree_int_cst_compare (next_init, node_init) > 0)
311 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
317 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
321 /* We got to the end of the list. Insert here. */
322 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
326 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
327 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
331 /* Check dependence between DRA and DRB for basic block vectorization.
332 If the accesses share same bases and offsets, we can compare their initial
333 constant offsets to decide whether they differ or not. In case of a read-
334 write dependence we check that the load is before the store to ensure that
335 vectorization will not change the order of the accesses. */
338 vect_drs_dependent_in_basic_block (struct data_reference *dra,
339 struct data_reference *drb)
341 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
344 /* We only call this function for pairs of loads and stores, but we verify
346 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 if (DR_IS_READ (dra))
354 /* Check that the data-refs have same bases and offsets. If not, we can't
355 determine if they are dependent. */
356 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
357 || !dr_equal_offsets_p (dra, drb))
360 /* Check the types. */
361 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
362 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364 if (type_size_a != type_size_b
365 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
366 TREE_TYPE (DR_REF (drb))))
369 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
370 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372 /* Two different locations - no dependence. */
373 if (init_a != init_b)
376 /* We have a read-write dependence. Check that the load is before the store.
377 When we vectorize basic blocks, vector load can be only before
378 corresponding scalar load, and vector store can be only after its
379 corresponding scalar store. So the order of the acceses is preserved in
380 case the load is before the store. */
381 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
382 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
389 /* Function vect_check_interleaving.
391 Check if DRA and DRB are a part of interleaving. In case they are, insert
392 DRA and DRB in an interleaving chain. */
395 vect_check_interleaving (struct data_reference *dra,
396 struct data_reference *drb)
398 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400 /* Check that the data-refs have same first location (except init) and they
401 are both either store or load (not load and store). */
402 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
403 || !dr_equal_offsets_p (dra, drb)
404 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
405 || DR_IS_READ (dra) != DR_IS_READ (drb))
409 1. data-refs are of the same type
410 2. their steps are equal
411 3. the step (if greater than zero) is greater than the difference between
413 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
414 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416 if (type_size_a != type_size_b
417 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
418 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
419 TREE_TYPE (DR_REF (drb))))
422 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
423 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
424 step = TREE_INT_CST_LOW (DR_STEP (dra));
428 /* If init_a == init_b + the size of the type * k, we have an interleaving,
429 and DRB is accessed before DRA. */
430 diff_mod_size = (init_a - init_b) % type_size_a;
432 if (step && (init_a - init_b) > step)
435 if (diff_mod_size == 0)
437 vect_update_interleaving_chain (drb, dra);
438 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 fprintf (vect_dump, "Detected interleaving ");
441 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
442 fprintf (vect_dump, " and ");
443 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
450 /* If init_b == init_a + the size of the type * k, we have an
451 interleaving, and DRA is accessed before DRB. */
452 diff_mod_size = (init_b - init_a) % type_size_a;
454 if (step && (init_b - init_a) > step)
457 if (diff_mod_size == 0)
459 vect_update_interleaving_chain (dra, drb);
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 fprintf (vect_dump, "Detected interleaving ");
463 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
474 /* Check if data references pointed by DR_I and DR_J are same or
475 belong to same interleaving group. Return FALSE if drs are
476 different, otherwise return TRUE. */
479 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481 gimple stmt_i = DR_STMT (dr_i);
482 gimple stmt_j = DR_STMT (dr_j);
484 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
485 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
486 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
487 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
488 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
494 /* If address ranges represented by DDR_I and DDR_J are equal,
495 return TRUE, otherwise return FALSE. */
498 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
501 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
502 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
503 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
509 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
510 tested at run-time. Return TRUE if DDR was successfully inserted.
511 Return false if versioning is not supported. */
514 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
521 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 fprintf (vect_dump, "mark for run-time aliasing test between ");
524 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
525 fprintf (vect_dump, " and ");
526 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
529 if (optimize_loop_nest_for_size_p (loop))
531 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning not supported when optimizing for size.");
536 /* FORNOW: We don't support versioning with outer-loop vectorization. */
539 if (vect_print_dump_info (REPORT_DR_DETAILS))
540 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
544 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
549 /* Function vect_analyze_data_ref_dependence.
551 Return TRUE if there (might) exist a dependence between a memory-reference
552 DRA and a memory-reference DRB. When versioning for alias may check a
553 dependence at run-time, return FALSE. Adjust *MAX_VF according to
554 the data dependence. */
557 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
558 loop_vec_info loop_vinfo, int *max_vf,
559 bool *data_dependence_in_bb)
562 struct loop *loop = NULL;
563 struct data_reference *dra = DDR_A (ddr);
564 struct data_reference *drb = DDR_B (ddr);
565 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
566 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
567 lambda_vector dist_v;
568 unsigned int loop_depth;
570 /* Don't bother to analyze statements marked as unvectorizable. */
571 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
572 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
575 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
577 /* Independent data accesses. */
578 vect_check_interleaving (dra, drb);
583 loop = LOOP_VINFO_LOOP (loop_vinfo);
585 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
594 fprintf (vect_dump, "versioning for alias required: "
595 "can't determine dependence between ");
596 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
597 fprintf (vect_dump, " and ");
598 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
601 /* Add to list of ddrs that need to be tested at run-time. */
602 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
605 /* When vectorizing a basic block unknown depnedence can still mean
607 if (vect_check_interleaving (dra, drb))
610 /* Read-read is OK (we need this check here, after checking for
612 if (DR_IS_READ (dra) && DR_IS_READ (drb))
615 if (vect_print_dump_info (REPORT_DR_DETAILS))
617 fprintf (vect_dump, "can't determine dependence between ");
618 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
619 fprintf (vect_dump, " and ");
620 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
623 /* We do not vectorize basic blocks with write-write dependencies. */
624 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
627 /* We deal with read-write dependencies in basic blocks later (by
628 verifying that all the loads in the basic block are before all the
630 *data_dependence_in_bb = true;
634 /* Versioning for alias is not yet supported for basic block SLP, and
635 dependence distance is unapplicable, hence, in case of known data
636 dependence, basic block vectorization is impossible for now. */
639 if (dra != drb && vect_check_interleaving (dra, drb))
642 if (vect_print_dump_info (REPORT_DR_DETAILS))
644 fprintf (vect_dump, "determined dependence between ");
645 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
646 fprintf (vect_dump, " and ");
647 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
650 /* Do not vectorize basic blcoks with write-write dependences. */
651 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
654 /* Check if this dependence is allowed in basic block vectorization. */
655 return vect_drs_dependent_in_basic_block (dra, drb);
658 /* Loop-based vectorization and known data dependence. */
659 if (DDR_NUM_DIST_VECTS (ddr) == 0)
661 if (vect_print_dump_info (REPORT_DR_DETAILS))
663 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
664 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
665 fprintf (vect_dump, " and ");
666 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
668 /* Add to list of ddrs that need to be tested at run-time. */
669 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
672 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
673 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
675 int dist = dist_v[loop_depth];
677 if (vect_print_dump_info (REPORT_DR_DETAILS))
678 fprintf (vect_dump, "dependence distance = %d.", dist);
682 if (vect_print_dump_info (REPORT_DR_DETAILS))
684 fprintf (vect_dump, "dependence distance == 0 between ");
685 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
686 fprintf (vect_dump, " and ");
687 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
690 /* For interleaving, mark that there is a read-write dependency if
691 necessary. We check before that one of the data-refs is store. */
692 if (DR_IS_READ (dra))
693 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
696 if (DR_IS_READ (drb))
697 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
703 if (dist > 0 && DDR_REVERSED_P (ddr))
705 /* If DDR_REVERSED_P the order of the data-refs in DDR was
706 reversed (to make distance vector positive), and the actual
707 distance is negative. */
708 if (vect_print_dump_info (REPORT_DR_DETAILS))
709 fprintf (vect_dump, "dependence distance negative.");
714 && abs (dist) < *max_vf)
716 /* The dependence distance requires reduction of the maximal
717 vectorization factor. */
718 *max_vf = abs (dist);
719 if (vect_print_dump_info (REPORT_DR_DETAILS))
720 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
724 if (abs (dist) >= *max_vf)
726 /* Dependence distance does not create dependence, as far as
727 vectorization is concerned, in this case. */
728 if (vect_print_dump_info (REPORT_DR_DETAILS))
729 fprintf (vect_dump, "dependence distance >= VF.");
733 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
735 fprintf (vect_dump, "not vectorized, possible dependence "
736 "between data-refs ");
737 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
738 fprintf (vect_dump, " and ");
739 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
748 /* Function vect_analyze_data_ref_dependences.
750 Examine all the data references in the loop, and make sure there do not
751 exist any data dependences between them. Set *MAX_VF according to
752 the maximum vectorization factor the data dependences allow. */
755 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
756 bb_vec_info bb_vinfo, int *max_vf,
757 bool *data_dependence_in_bb)
760 VEC (ddr_p, heap) *ddrs = NULL;
761 struct data_dependence_relation *ddr;
763 if (vect_print_dump_info (REPORT_DETAILS))
764 fprintf (vect_dump, "=== vect_analyze_dependences ===");
767 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
769 ddrs = BB_VINFO_DDRS (bb_vinfo);
771 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
772 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
773 data_dependence_in_bb))
780 /* Function vect_compute_data_ref_alignment
782 Compute the misalignment of the data reference DR.
785 1. If during the misalignment computation it is found that the data reference
786 cannot be vectorized then false is returned.
787 2. DR_MISALIGNMENT (DR) is defined.
789 FOR NOW: No analysis is actually performed. Misalignment is calculated
790 only for trivial cases. TODO. */
793 vect_compute_data_ref_alignment (struct data_reference *dr)
795 gimple stmt = DR_STMT (dr);
796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
797 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
798 struct loop *loop = NULL;
799 tree ref = DR_REF (dr);
801 tree base, base_addr;
804 tree aligned_to, alignment;
806 if (vect_print_dump_info (REPORT_DETAILS))
807 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
810 loop = LOOP_VINFO_LOOP (loop_vinfo);
812 /* Initialize misalignment to unknown. */
813 SET_DR_MISALIGNMENT (dr, -1);
815 misalign = DR_INIT (dr);
816 aligned_to = DR_ALIGNED_TO (dr);
817 base_addr = DR_BASE_ADDRESS (dr);
818 vectype = STMT_VINFO_VECTYPE (stmt_info);
820 /* In case the dataref is in an inner-loop of the loop that is being
821 vectorized (LOOP), we use the base and misalignment information
822 relative to the outer-loop (LOOP). This is ok only if the misalignment
823 stays the same throughout the execution of the inner-loop, which is why
824 we have to check that the stride of the dataref in the inner-loop evenly
825 divides by the vector size. */
826 if (loop && nested_in_vect_loop_p (loop, stmt))
828 tree step = DR_STEP (dr);
829 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
831 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
833 if (vect_print_dump_info (REPORT_ALIGNMENT))
834 fprintf (vect_dump, "inner step divides the vector-size.");
835 misalign = STMT_VINFO_DR_INIT (stmt_info);
836 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
837 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
841 if (vect_print_dump_info (REPORT_ALIGNMENT))
842 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
843 misalign = NULL_TREE;
847 base = build_fold_indirect_ref (base_addr);
848 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
850 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
853 if (vect_print_dump_info (REPORT_ALIGNMENT))
855 fprintf (vect_dump, "Unknown alignment for access: ");
856 print_generic_expr (vect_dump, base, TDF_SLIM);
862 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
864 || (TREE_CODE (base_addr) == SSA_NAME
865 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
866 TREE_TYPE (base_addr)))),
868 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
871 base_aligned = false;
875 /* Do not change the alignment of global variables if
876 flag_section_anchors is enabled. */
877 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
878 || (TREE_STATIC (base) && flag_section_anchors))
880 if (vect_print_dump_info (REPORT_DETAILS))
882 fprintf (vect_dump, "can't force alignment of ref: ");
883 print_generic_expr (vect_dump, ref, TDF_SLIM);
888 /* Force the alignment of the decl.
889 NOTE: This is the only change to the code we make during
890 the analysis phase, before deciding to vectorize the loop. */
891 if (vect_print_dump_info (REPORT_DETAILS))
893 fprintf (vect_dump, "force alignment of ");
894 print_generic_expr (vect_dump, ref, TDF_SLIM);
897 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
898 DECL_USER_ALIGN (base) = 1;
901 /* At this point we assume that the base is aligned. */
902 gcc_assert (base_aligned
903 || (TREE_CODE (base) == VAR_DECL
904 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
906 /* If this is a backward running DR then first access in the larger
907 vectype actually is N-1 elements before the address in the DR.
908 Adjust misalign accordingly. */
909 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
911 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
912 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
913 otherwise we wouldn't be here. */
914 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
915 /* PLUS because DR_STEP was negative. */
916 misalign = size_binop (PLUS_EXPR, misalign, offset);
919 /* Modulo alignment. */
920 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
922 if (!host_integerp (misalign, 1))
924 /* Negative or overflowed misalignment value. */
925 if (vect_print_dump_info (REPORT_DETAILS))
926 fprintf (vect_dump, "unexpected misalign value");
930 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
932 if (vect_print_dump_info (REPORT_DETAILS))
934 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
935 print_generic_expr (vect_dump, ref, TDF_SLIM);
942 /* Function vect_compute_data_refs_alignment
944 Compute the misalignment of data references in the loop.
945 Return FALSE if a data reference is found that cannot be vectorized. */
948 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
949 bb_vec_info bb_vinfo)
951 VEC (data_reference_p, heap) *datarefs;
952 struct data_reference *dr;
956 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
958 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
960 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
961 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
962 && !vect_compute_data_ref_alignment (dr))
966 /* Mark unsupported statement as unvectorizable. */
967 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
978 /* Function vect_update_misalignment_for_peel
980 DR - the data reference whose misalignment is to be adjusted.
981 DR_PEEL - the data reference whose misalignment is being made
982 zero in the vector loop by the peel.
983 NPEEL - the number of iterations in the peel loop if the misalignment
984 of DR_PEEL is known at compile time. */
987 vect_update_misalignment_for_peel (struct data_reference *dr,
988 struct data_reference *dr_peel, int npeel)
991 VEC(dr_p,heap) *same_align_drs;
992 struct data_reference *current_dr;
993 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
994 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
995 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
996 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
998 /* For interleaved data accesses the step in the loop must be multiplied by
999 the size of the interleaving group. */
1000 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1001 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
1002 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
1003 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
1005 /* It can be assumed that the data refs with the same alignment as dr_peel
1006 are aligned in the vector loop. */
1008 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
1009 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
1011 if (current_dr != dr)
1013 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1014 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1015 SET_DR_MISALIGNMENT (dr, 0);
1019 if (known_alignment_for_access_p (dr)
1020 && known_alignment_for_access_p (dr_peel))
1022 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
1023 int misal = DR_MISALIGNMENT (dr);
1024 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1025 misal += negative ? -npeel * dr_size : npeel * dr_size;
1026 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1027 SET_DR_MISALIGNMENT (dr, misal);
1031 if (vect_print_dump_info (REPORT_DETAILS))
1032 fprintf (vect_dump, "Setting misalignment to -1.");
1033 SET_DR_MISALIGNMENT (dr, -1);
1037 /* Function vect_verify_datarefs_alignment
1039 Return TRUE if all data references in the loop can be
1040 handled with respect to alignment. */
1043 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1045 VEC (data_reference_p, heap) *datarefs;
1046 struct data_reference *dr;
1047 enum dr_alignment_support supportable_dr_alignment;
1051 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1053 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1055 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1057 gimple stmt = DR_STMT (dr);
1058 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1060 /* For interleaving, only the alignment of the first access matters.
1061 Skip statements marked as not vectorizable. */
1062 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1063 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1064 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1067 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1068 if (!supportable_dr_alignment)
1070 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1072 if (DR_IS_READ (dr))
1074 "not vectorized: unsupported unaligned load.");
1077 "not vectorized: unsupported unaligned store.");
1079 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1083 if (supportable_dr_alignment != dr_aligned
1084 && vect_print_dump_info (REPORT_ALIGNMENT))
1085 fprintf (vect_dump, "Vectorizing an unaligned access.");
1091 /* Function vector_alignment_reachable_p
1093 Return true if vector alignment for DR is reachable by peeling
1094 a few loop iterations. Return false otherwise. */
1097 vector_alignment_reachable_p (struct data_reference *dr)
1099 gimple stmt = DR_STMT (dr);
1100 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1101 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1103 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1105 /* For interleaved access we peel only if number of iterations in
1106 the prolog loop ({VF - misalignment}), is a multiple of the
1107 number of the interleaved accesses. */
1108 int elem_size, mis_in_elements;
1109 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1111 /* FORNOW: handle only known alignment. */
1112 if (!known_alignment_for_access_p (dr))
1115 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1116 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1118 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
1122 /* If misalignment is known at the compile time then allow peeling
1123 only if natural alignment is reachable through peeling. */
1124 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1126 HOST_WIDE_INT elmsize =
1127 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1128 if (vect_print_dump_info (REPORT_DETAILS))
1130 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1131 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1133 if (DR_MISALIGNMENT (dr) % elmsize)
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1141 if (!known_alignment_for_access_p (dr))
1143 tree type = (TREE_TYPE (DR_REF (dr)));
1144 tree ba = DR_BASE_OBJECT (dr);
1145 bool is_packed = false;
1148 is_packed = contains_packed_reference (ba);
1150 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1153 if (vect_print_dump_info (REPORT_DETAILS))
1154 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1155 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1165 /* Calculate the cost of the memory access represented by DR. */
1168 vect_get_data_access_cost (struct data_reference *dr,
1169 unsigned int *inside_cost,
1170 unsigned int *outside_cost)
1172 gimple stmt = DR_STMT (dr);
1173 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1174 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1175 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1176 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1177 int ncopies = vf / nunits;
1178 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1180 if (!supportable_dr_alignment)
1181 *inside_cost = VECT_MAX_COST;
1184 if (DR_IS_READ (dr))
1185 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1187 vect_get_store_cost (dr, ncopies, inside_cost);
1190 if (vect_print_dump_info (REPORT_COST))
1191 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1192 "outside_cost = %d.", *inside_cost, *outside_cost);
1197 vect_peeling_hash (const void *elem)
1199 const struct _vect_peel_info *peel_info;
1201 peel_info = (const struct _vect_peel_info *) elem;
1202 return (hashval_t) peel_info->npeel;
1207 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1209 const struct _vect_peel_info *a, *b;
1211 a = (const struct _vect_peel_info *) elem1;
1212 b = (const struct _vect_peel_info *) elem2;
1213 return (a->npeel == b->npeel);
1217 /* Insert DR into peeling hash table with NPEEL as key. */
1220 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1223 struct _vect_peel_info elem, *slot;
1225 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1228 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1234 slot = XNEW (struct _vect_peel_info);
1235 slot->npeel = npeel;
1238 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1243 if (!supportable_dr_alignment && !flag_vect_cost_model)
1244 slot->count += VECT_MAX_COST;
1248 /* Traverse peeling hash table to find peeling option that aligns maximum
1249 number of data accesses. */
1252 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1254 vect_peel_info elem = (vect_peel_info) *slot;
1255 vect_peel_extended_info max = (vect_peel_extended_info) data;
1257 if (elem->count > max->peel_info.count
1258 || (elem->count == max->peel_info.count
1259 && max->peel_info.npeel > elem->npeel))
1261 max->peel_info.npeel = elem->npeel;
1262 max->peel_info.count = elem->count;
1263 max->peel_info.dr = elem->dr;
1270 /* Traverse peeling hash table and calculate cost for each peeling option.
1271 Find the one with the lowest cost. */
1274 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1276 vect_peel_info elem = (vect_peel_info) *slot;
1277 vect_peel_extended_info min = (vect_peel_extended_info) data;
1278 int save_misalignment, dummy;
1279 unsigned int inside_cost = 0, outside_cost = 0, i;
1280 gimple stmt = DR_STMT (elem->dr);
1281 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1282 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1283 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1284 struct data_reference *dr;
1286 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1288 stmt = DR_STMT (dr);
1289 stmt_info = vinfo_for_stmt (stmt);
1290 /* For interleaving, only the alignment of the first access
1292 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1293 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1296 save_misalignment = DR_MISALIGNMENT (dr);
1297 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1298 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1299 SET_DR_MISALIGNMENT (dr, save_misalignment);
1302 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1303 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1305 if (inside_cost < min->inside_cost
1306 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1308 min->inside_cost = inside_cost;
1309 min->outside_cost = outside_cost;
1310 min->peel_info.dr = elem->dr;
1311 min->peel_info.npeel = elem->npeel;
1318 /* Choose best peeling option by traversing peeling hash table and either
1319 choosing an option with the lowest cost (if cost model is enabled) or the
1320 option that aligns as many accesses as possible. */
1322 static struct data_reference *
1323 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1324 unsigned int *npeel)
1326 struct _vect_peel_extended_info res;
1328 res.peel_info.dr = NULL;
1330 if (flag_vect_cost_model)
1332 res.inside_cost = INT_MAX;
1333 res.outside_cost = INT_MAX;
1334 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1335 vect_peeling_hash_get_lowest_cost, &res);
1339 res.peel_info.count = 0;
1340 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1341 vect_peeling_hash_get_most_frequent, &res);
1344 *npeel = res.peel_info.npeel;
1345 return res.peel_info.dr;
1349 /* Function vect_enhance_data_refs_alignment
1351 This pass will use loop versioning and loop peeling in order to enhance
1352 the alignment of data references in the loop.
1354 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1355 original loop is to be vectorized. Any other loops that are created by
1356 the transformations performed in this pass - are not supposed to be
1357 vectorized. This restriction will be relaxed.
1359 This pass will require a cost model to guide it whether to apply peeling
1360 or versioning or a combination of the two. For example, the scheme that
1361 intel uses when given a loop with several memory accesses, is as follows:
1362 choose one memory access ('p') which alignment you want to force by doing
1363 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1364 other accesses are not necessarily aligned, or (2) use loop versioning to
1365 generate one loop in which all accesses are aligned, and another loop in
1366 which only 'p' is necessarily aligned.
1368 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1369 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1370 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1372 Devising a cost model is the most critical aspect of this work. It will
1373 guide us on which access to peel for, whether to use loop versioning, how
1374 many versions to create, etc. The cost model will probably consist of
1375 generic considerations as well as target specific considerations (on
1376 powerpc for example, misaligned stores are more painful than misaligned
1379 Here are the general steps involved in alignment enhancements:
1381 -- original loop, before alignment analysis:
1382 for (i=0; i<N; i++){
1383 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1384 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1387 -- After vect_compute_data_refs_alignment:
1388 for (i=0; i<N; i++){
1389 x = q[i]; # DR_MISALIGNMENT(q) = 3
1390 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1393 -- Possibility 1: we do loop versioning:
1395 for (i=0; i<N; i++){ # loop 1A
1396 x = q[i]; # DR_MISALIGNMENT(q) = 3
1397 p[i] = y; # DR_MISALIGNMENT(p) = 0
1401 for (i=0; i<N; i++){ # loop 1B
1402 x = q[i]; # DR_MISALIGNMENT(q) = 3
1403 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1407 -- Possibility 2: we do loop peeling:
1408 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1412 for (i = 3; i < N; i++){ # loop 2A
1413 x = q[i]; # DR_MISALIGNMENT(q) = 0
1414 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1417 -- Possibility 3: combination of loop peeling and versioning:
1418 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1423 for (i = 3; i<N; i++){ # loop 3A
1424 x = q[i]; # DR_MISALIGNMENT(q) = 0
1425 p[i] = y; # DR_MISALIGNMENT(p) = 0
1429 for (i = 3; i<N; i++){ # loop 3B
1430 x = q[i]; # DR_MISALIGNMENT(q) = 0
1431 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1435 These loops are later passed to loop_transform to be vectorized. The
1436 vectorizer will use the alignment information to guide the transformation
1437 (whether to generate regular loads/stores, or with special handling for
1441 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1443 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1444 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1445 enum dr_alignment_support supportable_dr_alignment;
1446 struct data_reference *dr0 = NULL, *first_store = NULL;
1447 struct data_reference *dr;
1449 bool do_peeling = false;
1450 bool do_versioning = false;
1453 stmt_vec_info stmt_info;
1454 int vect_versioning_for_alias_required;
1455 unsigned int npeel = 0;
1456 bool all_misalignments_unknown = true;
1457 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1458 unsigned possible_npeel_number = 1;
1460 unsigned int nelements, mis, same_align_drs_max = 0;
1462 if (vect_print_dump_info (REPORT_DETAILS))
1463 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1465 /* While cost model enhancements are expected in the future, the high level
1466 view of the code at this time is as follows:
1468 A) If there is a misaligned access then see if peeling to align
1469 this access can make all data references satisfy
1470 vect_supportable_dr_alignment. If so, update data structures
1471 as needed and return true.
1473 B) If peeling wasn't possible and there is a data reference with an
1474 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1475 then see if loop versioning checks can be used to make all data
1476 references satisfy vect_supportable_dr_alignment. If so, update
1477 data structures as needed and return true.
1479 C) If neither peeling nor versioning were successful then return false if
1480 any data reference does not satisfy vect_supportable_dr_alignment.
1482 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1484 Note, Possibility 3 above (which is peeling and versioning together) is not
1485 being done at this time. */
1487 /* (1) Peeling to force alignment. */
1489 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1491 + How many accesses will become aligned due to the peeling
1492 - How many accesses will become unaligned due to the peeling,
1493 and the cost of misaligned accesses.
1494 - The cost of peeling (the extra runtime checks, the increase
1497 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1499 stmt = DR_STMT (dr);
1500 stmt_info = vinfo_for_stmt (stmt);
1502 if (!STMT_VINFO_RELEVANT (stmt_info))
1505 /* For interleaving, only the alignment of the first access
1507 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1508 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1511 /* For invariant accesses there is nothing to enhance. */
1512 if (integer_zerop (DR_STEP (dr)))
1515 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1516 do_peeling = vector_alignment_reachable_p (dr);
1519 if (known_alignment_for_access_p (dr))
1521 unsigned int npeel_tmp;
1522 bool negative = tree_int_cst_compare (DR_STEP (dr),
1523 size_zero_node) < 0;
1525 /* Save info about DR in the hash table. */
1526 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1527 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1528 htab_create (1, vect_peeling_hash,
1529 vect_peeling_hash_eq, free);
1531 vectype = STMT_VINFO_VECTYPE (stmt_info);
1532 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1533 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1534 TREE_TYPE (DR_REF (dr))));
1535 npeel_tmp = (negative
1536 ? (mis - nelements) : (nelements - mis))
1539 /* For multiple types, it is possible that the bigger type access
1540 will have more than one peeling option. E.g., a loop with two
1541 types: one of size (vector size / 4), and the other one of
1542 size (vector size / 8). Vectorization factor will 8. If both
1543 access are misaligned by 3, the first one needs one scalar
1544 iteration to be aligned, and the second one needs 5. But the
1545 the first one will be aligned also by peeling 5 scalar
1546 iterations, and in that case both accesses will be aligned.
1547 Hence, except for the immediate peeling amount, we also want
1548 to try to add full vector size, while we don't exceed
1549 vectorization factor.
1550 We do this automtically for cost model, since we calculate cost
1551 for every peeling option. */
1552 if (!flag_vect_cost_model)
1553 possible_npeel_number = vf /nelements;
1555 /* Handle the aligned case. We may decide to align some other
1556 access, making DR unaligned. */
1557 if (DR_MISALIGNMENT (dr) == 0)
1560 if (!flag_vect_cost_model)
1561 possible_npeel_number++;
1564 for (j = 0; j < possible_npeel_number; j++)
1566 gcc_assert (npeel_tmp <= vf);
1567 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1568 npeel_tmp += nelements;
1571 all_misalignments_unknown = false;
1572 /* Data-ref that was chosen for the case that all the
1573 misalignments are unknown is not relevant anymore, since we
1574 have a data-ref with known alignment. */
1579 /* If we don't know all the misalignment values, we prefer
1580 peeling for data-ref that has maximum number of data-refs
1581 with the same alignment, unless the target prefers to align
1582 stores over load. */
1583 if (all_misalignments_unknown)
1585 if (same_align_drs_max < VEC_length (dr_p,
1586 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1589 same_align_drs_max = VEC_length (dr_p,
1590 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1594 if (!first_store && DR_IS_WRITE (dr))
1598 /* If there are both known and unknown misaligned accesses in the
1599 loop, we choose peeling amount according to the known
1603 if (!supportable_dr_alignment)
1606 if (!first_store && DR_IS_WRITE (dr))
1613 if (!aligned_access_p (dr))
1615 if (vect_print_dump_info (REPORT_DETAILS))
1616 fprintf (vect_dump, "vector alignment may not be reachable");
1623 vect_versioning_for_alias_required
1624 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1626 /* Temporarily, if versioning for alias is required, we disable peeling
1627 until we support peeling and versioning. Often peeling for alignment
1628 will require peeling for loop-bound, which in turn requires that we
1629 know how to adjust the loop ivs after the loop. */
1630 if (vect_versioning_for_alias_required
1631 || !vect_can_advance_ivs_p (loop_vinfo)
1632 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1635 if (do_peeling && all_misalignments_unknown
1636 && vect_supportable_dr_alignment (dr0, false))
1639 /* Check if the target requires to prefer stores over loads, i.e., if
1640 misaligned stores are more expensive than misaligned loads (taking
1641 drs with same alignment into account). */
1642 if (first_store && DR_IS_READ (dr0))
1644 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1645 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1646 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1647 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1649 vect_get_data_access_cost (dr0, &load_inside_cost,
1650 &load_outside_cost);
1651 vect_get_data_access_cost (first_store, &store_inside_cost,
1652 &store_outside_cost);
1654 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1655 aligning the load DR0). */
1656 load_inside_penalty = store_inside_cost;
1657 load_outside_penalty = store_outside_cost;
1658 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1659 (vinfo_for_stmt (DR_STMT (first_store))),
1662 if (DR_IS_READ (dr))
1664 load_inside_penalty += load_inside_cost;
1665 load_outside_penalty += load_outside_cost;
1669 load_inside_penalty += store_inside_cost;
1670 load_outside_penalty += store_outside_cost;
1673 /* Calculate the penalty for leaving DR0 unaligned (by
1674 aligning the FIRST_STORE). */
1675 store_inside_penalty = load_inside_cost;
1676 store_outside_penalty = load_outside_cost;
1677 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1678 (vinfo_for_stmt (DR_STMT (dr0))),
1681 if (DR_IS_READ (dr))
1683 store_inside_penalty += load_inside_cost;
1684 store_outside_penalty += load_outside_cost;
1688 store_inside_penalty += store_inside_cost;
1689 store_outside_penalty += store_outside_cost;
1692 if (load_inside_penalty > store_inside_penalty
1693 || (load_inside_penalty == store_inside_penalty
1694 && load_outside_penalty > store_outside_penalty))
1698 /* In case there are only loads with different unknown misalignments, use
1699 peeling only if it may help to align other accesses in the loop. */
1700 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1701 (vinfo_for_stmt (DR_STMT (dr0))))
1702 && vect_supportable_dr_alignment (dr0, false)
1703 != dr_unaligned_supported)
1707 if (do_peeling && !dr0)
1709 /* Peeling is possible, but there is no data access that is not supported
1710 unless aligned. So we try to choose the best possible peeling. */
1712 /* We should get here only if there are drs with known misalignment. */
1713 gcc_assert (!all_misalignments_unknown);
1715 /* Choose the best peeling from the hash table. */
1716 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1723 stmt = DR_STMT (dr0);
1724 stmt_info = vinfo_for_stmt (stmt);
1725 vectype = STMT_VINFO_VECTYPE (stmt_info);
1726 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1728 if (known_alignment_for_access_p (dr0))
1730 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1731 size_zero_node) < 0;
1734 /* Since it's known at compile time, compute the number of
1735 iterations in the peeled loop (the peeling factor) for use in
1736 updating DR_MISALIGNMENT values. The peeling factor is the
1737 vectorization factor minus the misalignment as an element
1739 mis = DR_MISALIGNMENT (dr0);
1740 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1741 npeel = ((negative ? mis - nelements : nelements - mis)
1745 /* For interleaved data access every iteration accesses all the
1746 members of the group, therefore we divide the number of iterations
1747 by the group size. */
1748 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1749 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1750 npeel /= GROUP_SIZE (stmt_info);
1752 if (vect_print_dump_info (REPORT_DETAILS))
1753 fprintf (vect_dump, "Try peeling by %d", npeel);
1756 /* Ensure that all data refs can be vectorized after the peel. */
1757 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1759 int save_misalignment;
1764 stmt = DR_STMT (dr);
1765 stmt_info = vinfo_for_stmt (stmt);
1766 /* For interleaving, only the alignment of the first access
1768 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1769 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
1772 save_misalignment = DR_MISALIGNMENT (dr);
1773 vect_update_misalignment_for_peel (dr, dr0, npeel);
1774 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1775 SET_DR_MISALIGNMENT (dr, save_misalignment);
1777 if (!supportable_dr_alignment)
1784 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1786 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1795 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1796 If the misalignment of DR_i is identical to that of dr0 then set
1797 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1798 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1799 by the peeling factor times the element size of DR_i (MOD the
1800 vectorization factor times the size). Otherwise, the
1801 misalignment of DR_i must be set to unknown. */
1802 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1804 vect_update_misalignment_for_peel (dr, dr0, npeel);
1806 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1808 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1810 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1811 SET_DR_MISALIGNMENT (dr0, 0);
1812 if (vect_print_dump_info (REPORT_ALIGNMENT))
1813 fprintf (vect_dump, "Alignment of access forced using peeling.");
1815 if (vect_print_dump_info (REPORT_DETAILS))
1816 fprintf (vect_dump, "Peeling for alignment will be applied.");
1818 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1825 /* (2) Versioning to force alignment. */
1827 /* Try versioning if:
1828 1) flag_tree_vect_loop_version is TRUE
1829 2) optimize loop for speed
1830 3) there is at least one unsupported misaligned data ref with an unknown
1832 4) all misaligned data refs with a known misalignment are supported, and
1833 5) the number of runtime alignment checks is within reason. */
1836 flag_tree_vect_loop_version
1837 && optimize_loop_nest_for_speed_p (loop)
1838 && (!loop->inner); /* FORNOW */
1842 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1844 stmt = DR_STMT (dr);
1845 stmt_info = vinfo_for_stmt (stmt);
1847 /* For interleaving, only the alignment of the first access
1849 if (aligned_access_p (dr)
1850 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1851 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
1854 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1856 if (!supportable_dr_alignment)
1862 if (known_alignment_for_access_p (dr)
1863 || VEC_length (gimple,
1864 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1865 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1867 do_versioning = false;
1871 stmt = DR_STMT (dr);
1872 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1873 gcc_assert (vectype);
1875 /* The rightmost bits of an aligned address must be zeros.
1876 Construct the mask needed for this test. For example,
1877 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1878 mask must be 15 = 0xf. */
1879 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1881 /* FORNOW: use the same mask to test all potentially unaligned
1882 references in the loop. The vectorizer currently supports
1883 a single vector size, see the reference to
1884 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1885 vectorization factor is computed. */
1886 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1887 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1888 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1889 VEC_safe_push (gimple, heap,
1890 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1895 /* Versioning requires at least one misaligned data reference. */
1896 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1897 do_versioning = false;
1898 else if (!do_versioning)
1899 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1904 VEC(gimple,heap) *may_misalign_stmts
1905 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1908 /* It can now be assumed that the data references in the statements
1909 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1910 of the loop being vectorized. */
1911 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1913 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1914 dr = STMT_VINFO_DATA_REF (stmt_info);
1915 SET_DR_MISALIGNMENT (dr, 0);
1916 if (vect_print_dump_info (REPORT_ALIGNMENT))
1917 fprintf (vect_dump, "Alignment of access forced using versioning.");
1920 if (vect_print_dump_info (REPORT_DETAILS))
1921 fprintf (vect_dump, "Versioning for alignment will be applied.");
1923 /* Peeling and versioning can't be done together at this time. */
1924 gcc_assert (! (do_peeling && do_versioning));
1926 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1931 /* This point is reached if neither peeling nor versioning is being done. */
1932 gcc_assert (! (do_peeling || do_versioning));
1934 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1939 /* Function vect_find_same_alignment_drs.
1941 Update group and alignment relations according to the chosen
1942 vectorization factor. */
1945 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1946 loop_vec_info loop_vinfo)
1949 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1950 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1951 struct data_reference *dra = DDR_A (ddr);
1952 struct data_reference *drb = DDR_B (ddr);
1953 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1954 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1955 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1956 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1957 lambda_vector dist_v;
1958 unsigned int loop_depth;
1960 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1966 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1969 /* Loop-based vectorization and known data dependence. */
1970 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1973 /* Data-dependence analysis reports a distance vector of zero
1974 for data-references that overlap only in the first iteration
1975 but have different sign step (see PR45764).
1976 So as a sanity check require equal DR_STEP. */
1977 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1980 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1981 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1983 int dist = dist_v[loop_depth];
1985 if (vect_print_dump_info (REPORT_DR_DETAILS))
1986 fprintf (vect_dump, "dependence distance = %d.", dist);
1988 /* Same loop iteration. */
1990 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1992 /* Two references with distance zero have the same alignment. */
1993 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1994 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1995 if (vect_print_dump_info (REPORT_ALIGNMENT))
1996 fprintf (vect_dump, "accesses have the same alignment.");
1997 if (vect_print_dump_info (REPORT_DR_DETAILS))
1999 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2000 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2001 fprintf (vect_dump, " and ");
2002 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2009 /* Function vect_analyze_data_refs_alignment
2011 Analyze the alignment of the data-references in the loop.
2012 Return FALSE if a data reference is found that cannot be vectorized. */
2015 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
2016 bb_vec_info bb_vinfo)
2018 if (vect_print_dump_info (REPORT_DETAILS))
2019 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2021 /* Mark groups of data references with same alignment using
2022 data dependence information. */
2025 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2026 struct data_dependence_relation *ddr;
2029 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
2030 vect_find_same_alignment_drs (ddr, loop_vinfo);
2033 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
2035 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2037 "not vectorized: can't calculate alignment for data ref.");
2045 /* Analyze groups of strided accesses: check that DR belongs to a group of
2046 strided accesses of legal size, step, etc. Detect gaps, single element
2047 interleaving, and other special cases. Set strided access info.
2048 Collect groups of strided stores for further use in SLP analysis. */
2051 vect_analyze_group_access (struct data_reference *dr)
2053 tree step = DR_STEP (dr);
2054 tree scalar_type = TREE_TYPE (DR_REF (dr));
2055 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2056 gimple stmt = DR_STMT (dr);
2057 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2058 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2059 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2060 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2061 HOST_WIDE_INT stride, last_accessed_element = 1;
2062 bool slp_impossible = false;
2064 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2065 interleaving group (including gaps). */
2066 stride = dr_step / type_size;
2068 /* Not consecutive access is possible only if it is a part of interleaving. */
2069 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
2071 /* Check if it this DR is a part of interleaving, and is a single
2072 element of the group that is accessed in the loop. */
2074 /* Gaps are supported only for loads. STEP must be a multiple of the type
2075 size. The size of the group must be a power of 2. */
2077 && (dr_step % type_size) == 0
2079 && exact_log2 (stride) != -1)
2081 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
2082 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2083 if (vect_print_dump_info (REPORT_DR_DETAILS))
2085 fprintf (vect_dump, "Detected single element interleaving ");
2086 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2087 fprintf (vect_dump, " step ");
2088 print_generic_expr (vect_dump, step, TDF_SLIM);
2093 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2095 if (vect_print_dump_info (REPORT_DETAILS))
2096 fprintf (vect_dump, "Data access with gaps requires scalar "
2103 if (vect_print_dump_info (REPORT_DETAILS))
2105 fprintf (vect_dump, "not consecutive access ");
2106 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2111 /* Mark the statement as unvectorizable. */
2112 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2119 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
2121 /* First stmt in the interleaving chain. Check the chain. */
2122 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
2123 struct data_reference *data_ref = dr;
2124 unsigned int count = 1;
2126 tree prev_init = DR_INIT (data_ref);
2128 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2132 /* Skip same data-refs. In case that two or more stmts share
2133 data-ref (supported only for loads), we vectorize only the first
2134 stmt, and the rest get their vectorized loads from the first
2136 if (!tree_int_cst_compare (DR_INIT (data_ref),
2137 DR_INIT (STMT_VINFO_DATA_REF (
2138 vinfo_for_stmt (next)))))
2140 if (DR_IS_WRITE (data_ref))
2142 if (vect_print_dump_info (REPORT_DETAILS))
2143 fprintf (vect_dump, "Two store stmts share the same dr.");
2147 /* Check that there is no load-store dependencies for this loads
2148 to prevent a case of load-store-load to the same location. */
2149 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2150 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2152 if (vect_print_dump_info (REPORT_DETAILS))
2154 "READ_WRITE dependence in interleaving.");
2158 /* For load use the same data-ref load. */
2159 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2162 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2168 /* Check that all the accesses have the same STEP. */
2169 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2170 if (tree_int_cst_compare (step, next_step))
2172 if (vect_print_dump_info (REPORT_DETAILS))
2173 fprintf (vect_dump, "not consecutive access in interleaving");
2177 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2178 /* Check that the distance between two accesses is equal to the type
2179 size. Otherwise, we have gaps. */
2180 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2181 - TREE_INT_CST_LOW (prev_init)) / type_size;
2184 /* FORNOW: SLP of accesses with gaps is not supported. */
2185 slp_impossible = true;
2186 if (DR_IS_WRITE (data_ref))
2188 if (vect_print_dump_info (REPORT_DETAILS))
2189 fprintf (vect_dump, "interleaved store with gaps");
2196 last_accessed_element += diff;
2198 /* Store the gap from the previous member of the group. If there is no
2199 gap in the access, GROUP_GAP is always 1. */
2200 GROUP_GAP (vinfo_for_stmt (next)) = diff;
2202 prev_init = DR_INIT (data_ref);
2203 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
2204 /* Count the number of data-refs in the chain. */
2208 /* COUNT is the number of accesses found, we multiply it by the size of
2209 the type to get COUNT_IN_BYTES. */
2210 count_in_bytes = type_size * count;
2212 /* Check that the size of the interleaving (including gaps) is not
2213 greater than STEP. */
2214 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2216 if (vect_print_dump_info (REPORT_DETAILS))
2218 fprintf (vect_dump, "interleaving size is greater than step for ");
2219 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2224 /* Check that the size of the interleaving is equal to STEP for stores,
2225 i.e., that there are no gaps. */
2226 if (dr_step && dr_step != count_in_bytes)
2228 if (DR_IS_READ (dr))
2230 slp_impossible = true;
2231 /* There is a gap after the last load in the group. This gap is a
2232 difference between the stride and the number of elements. When
2233 there is no gap, this difference should be 0. */
2234 GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2238 if (vect_print_dump_info (REPORT_DETAILS))
2239 fprintf (vect_dump, "interleaved store with gaps");
2244 /* Check that STEP is a multiple of type size. */
2245 if (dr_step && (dr_step % type_size) != 0)
2247 if (vect_print_dump_info (REPORT_DETAILS))
2249 fprintf (vect_dump, "step is not a multiple of type size: step ");
2250 print_generic_expr (vect_dump, step, TDF_SLIM);
2251 fprintf (vect_dump, " size ");
2252 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2261 GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2262 if (vect_print_dump_info (REPORT_DETAILS))
2263 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2265 /* SLP: create an SLP data structure for every interleaving group of
2266 stores for further analysis in vect_analyse_slp. */
2267 if (DR_IS_WRITE (dr) && !slp_impossible)
2270 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2273 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2277 /* There is a gap in the end of the group. */
2278 if (stride - last_accessed_element > 0 && loop_vinfo)
2280 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
2281 if (vect_print_dump_info (REPORT_DETAILS))
2282 fprintf (vect_dump, "Data access with gaps requires scalar "
2291 /* Analyze the access pattern of the data-reference DR.
2292 In case of non-consecutive accesses call vect_analyze_group_access() to
2293 analyze groups of strided accesses. */
2296 vect_analyze_data_ref_access (struct data_reference *dr)
2298 tree step = DR_STEP (dr);
2299 tree scalar_type = TREE_TYPE (DR_REF (dr));
2300 gimple stmt = DR_STMT (dr);
2301 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2302 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2303 struct loop *loop = NULL;
2304 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2307 loop = LOOP_VINFO_LOOP (loop_vinfo);
2309 if (loop_vinfo && !step)
2311 if (vect_print_dump_info (REPORT_DETAILS))
2312 fprintf (vect_dump, "bad data-ref access in loop");
2316 /* Allow invariant loads in loops. */
2317 if (loop_vinfo && dr_step == 0)
2319 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2320 return DR_IS_READ (dr);
2323 if (loop && nested_in_vect_loop_p (loop, stmt))
2325 /* Interleaved accesses are not yet supported within outer-loop
2326 vectorization for references in the inner-loop. */
2327 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2329 /* For the rest of the analysis we use the outer-loop step. */
2330 step = STMT_VINFO_DR_STEP (stmt_info);
2331 dr_step = TREE_INT_CST_LOW (step);
2335 if (vect_print_dump_info (REPORT_ALIGNMENT))
2336 fprintf (vect_dump, "zero step in outer loop.");
2337 if (DR_IS_READ (dr))
2345 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2347 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2349 /* Mark that it is not interleaving. */
2350 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2354 if (loop && nested_in_vect_loop_p (loop, stmt))
2356 if (vect_print_dump_info (REPORT_ALIGNMENT))
2357 fprintf (vect_dump, "strided access in outer loop.");
2361 /* Not consecutive access - check if it's a part of interleaving group. */
2362 return vect_analyze_group_access (dr);
2366 /* Function vect_analyze_data_ref_accesses.
2368 Analyze the access pattern of all the data references in the loop.
2370 FORNOW: the only access pattern that is considered vectorizable is a
2371 simple step 1 (consecutive) access.
2373 FORNOW: handle only arrays and pointer accesses. */
2376 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2379 VEC (data_reference_p, heap) *datarefs;
2380 struct data_reference *dr;
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2386 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2388 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2390 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2391 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2392 && !vect_analyze_data_ref_access (dr))
2394 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2395 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2399 /* Mark the statement as not vectorizable. */
2400 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2410 /* Function vect_prune_runtime_alias_test_list.
2412 Prune a list of ddrs to be tested at run-time by versioning for alias.
2413 Return FALSE if resulting list of ddrs is longer then allowed by
2414 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2417 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2419 VEC (ddr_p, heap) * ddrs =
2420 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2423 if (vect_print_dump_info (REPORT_DETAILS))
2424 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2426 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2431 ddr_i = VEC_index (ddr_p, ddrs, i);
2434 for (j = 0; j < i; j++)
2436 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2438 if (vect_vfa_range_equal (ddr_i, ddr_j))
2440 if (vect_print_dump_info (REPORT_DR_DETAILS))
2442 fprintf (vect_dump, "found equal ranges ");
2443 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2444 fprintf (vect_dump, ", ");
2445 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2446 fprintf (vect_dump, " and ");
2447 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2448 fprintf (vect_dump, ", ");
2449 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2458 VEC_ordered_remove (ddr_p, ddrs, i);
2464 if (VEC_length (ddr_p, ddrs) >
2465 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2467 if (vect_print_dump_info (REPORT_DR_DETAILS))
2470 "disable versioning for alias - max number of generated "
2471 "checks exceeded.");
2474 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2483 /* Function vect_analyze_data_refs.
2485 Find all the data references in the loop or basic block.
2487 The general structure of the analysis of data refs in the vectorizer is as
2489 1- vect_analyze_data_refs(loop/bb): call
2490 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2491 in the loop/bb and their dependences.
2492 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2493 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2494 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2499 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2500 bb_vec_info bb_vinfo,
2503 struct loop *loop = NULL;
2504 basic_block bb = NULL;
2506 VEC (data_reference_p, heap) *datarefs;
2507 struct data_reference *dr;
2511 if (vect_print_dump_info (REPORT_DETAILS))
2512 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2516 loop = LOOP_VINFO_LOOP (loop_vinfo);
2517 res = compute_data_dependences_for_loop
2519 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2520 &LOOP_VINFO_DATAREFS (loop_vinfo),
2521 &LOOP_VINFO_DDRS (loop_vinfo));
2525 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2526 fprintf (vect_dump, "not vectorized: loop contains function calls"
2527 " or data references that cannot be analyzed");
2531 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2535 bb = BB_VINFO_BB (bb_vinfo);
2536 res = compute_data_dependences_for_bb (bb, true,
2537 &BB_VINFO_DATAREFS (bb_vinfo),
2538 &BB_VINFO_DDRS (bb_vinfo));
2541 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2542 fprintf (vect_dump, "not vectorized: basic block contains function"
2543 " calls or data references that cannot be analyzed");
2547 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2550 /* Go through the data-refs, check that the analysis succeeded. Update
2551 pointer from stmt_vec_info struct to DR and vectype. */
2553 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2556 stmt_vec_info stmt_info;
2557 tree base, offset, init;
2560 if (!dr || !DR_REF (dr))
2562 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2563 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2567 stmt = DR_STMT (dr);
2568 stmt_info = vinfo_for_stmt (stmt);
2570 /* Check that analysis of the data-ref succeeded. */
2571 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2574 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2576 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2577 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2582 /* Mark the statement as not vectorizable. */
2583 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2590 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2592 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2593 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2597 /* Mark the statement as not vectorizable. */
2598 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2605 if (TREE_THIS_VOLATILE (DR_REF (dr)))
2607 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2609 fprintf (vect_dump, "not vectorized: volatile type ");
2610 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2615 base = unshare_expr (DR_BASE_ADDRESS (dr));
2616 offset = unshare_expr (DR_OFFSET (dr));
2617 init = unshare_expr (DR_INIT (dr));
2619 if (stmt_can_throw_internal (stmt))
2621 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2623 fprintf (vect_dump, "not vectorized: statement can throw an "
2625 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2630 /* Update DR field in stmt_vec_info struct. */
2632 /* If the dataref is in an inner-loop of the loop that is considered for
2633 for vectorization, we also want to analyze the access relative to
2634 the outer-loop (DR contains information only relative to the
2635 inner-most enclosing loop). We do that by building a reference to the
2636 first location accessed by the inner-loop, and analyze it relative to
2638 if (loop && nested_in_vect_loop_p (loop, stmt))
2640 tree outer_step, outer_base, outer_init;
2641 HOST_WIDE_INT pbitsize, pbitpos;
2643 enum machine_mode pmode;
2644 int punsignedp, pvolatilep;
2645 affine_iv base_iv, offset_iv;
2648 /* Build a reference to the first location accessed by the
2649 inner-loop: *(BASE+INIT). (The first location is actually
2650 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2651 tree inner_base = build_fold_indirect_ref
2652 (fold_build_pointer_plus (base, init));
2654 if (vect_print_dump_info (REPORT_DETAILS))
2656 fprintf (vect_dump, "analyze in outer-loop: ");
2657 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2660 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2661 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2662 gcc_assert (outer_base != NULL_TREE);
2664 if (pbitpos % BITS_PER_UNIT != 0)
2666 if (vect_print_dump_info (REPORT_DETAILS))
2667 fprintf (vect_dump, "failed: bit offset alignment.\n");
2671 outer_base = build_fold_addr_expr (outer_base);
2672 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2675 if (vect_print_dump_info (REPORT_DETAILS))
2676 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2683 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2691 offset_iv.base = ssize_int (0);
2692 offset_iv.step = ssize_int (0);
2694 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2697 if (vect_print_dump_info (REPORT_DETAILS))
2698 fprintf (vect_dump, "evolution of offset is not affine.\n");
2702 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2703 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2704 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2705 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2706 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2708 outer_step = size_binop (PLUS_EXPR,
2709 fold_convert (ssizetype, base_iv.step),
2710 fold_convert (ssizetype, offset_iv.step));
2712 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2713 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2714 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2715 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2716 STMT_VINFO_DR_OFFSET (stmt_info) =
2717 fold_convert (ssizetype, offset_iv.base);
2718 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2719 size_int (highest_pow2_factor (offset_iv.base));
2721 if (vect_print_dump_info (REPORT_DETAILS))
2723 fprintf (vect_dump, "\touter base_address: ");
2724 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2725 fprintf (vect_dump, "\n\touter offset from base address: ");
2726 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2727 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2728 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2729 fprintf (vect_dump, "\n\touter step: ");
2730 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2731 fprintf (vect_dump, "\n\touter aligned to: ");
2732 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2736 if (STMT_VINFO_DATA_REF (stmt_info))
2738 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2741 "not vectorized: more than one data ref in stmt: ");
2742 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2747 STMT_VINFO_DATA_REF (stmt_info) = dr;
2749 /* Set vectype for STMT. */
2750 scalar_type = TREE_TYPE (DR_REF (dr));
2751 STMT_VINFO_VECTYPE (stmt_info) =
2752 get_vectype_for_scalar_type (scalar_type);
2753 if (!STMT_VINFO_VECTYPE (stmt_info))
2755 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2758 "not vectorized: no vectype for stmt: ");
2759 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2760 fprintf (vect_dump, " scalar_type: ");
2761 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2766 /* Mark the statement as not vectorizable. */
2767 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2774 /* Adjust the minimal vectorization factor according to the
2776 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2785 /* Function vect_get_new_vect_var.
2787 Returns a name for a new variable. The current naming scheme appends the
2788 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2789 the name of vectorizer generated variables, and appends that to NAME if
2793 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2800 case vect_simple_var:
2803 case vect_scalar_var:
2806 case vect_pointer_var:
2815 char* tmp = concat (prefix, name, NULL);
2816 new_vect_var = create_tmp_var (type, tmp);
2820 new_vect_var = create_tmp_var (type, prefix);
2822 /* Mark vector typed variable as a gimple register variable. */
2823 if (TREE_CODE (type) == VECTOR_TYPE)
2824 DECL_GIMPLE_REG_P (new_vect_var) = true;
2826 return new_vect_var;
2830 /* Function vect_create_addr_base_for_vector_ref.
2832 Create an expression that computes the address of the first memory location
2833 that will be accessed for a data reference.
2836 STMT: The statement containing the data reference.
2837 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2838 OFFSET: Optional. If supplied, it is be added to the initial address.
2839 LOOP: Specify relative to which loop-nest should the address be computed.
2840 For example, when the dataref is in an inner-loop nested in an
2841 outer-loop that is now being vectorized, LOOP can be either the
2842 outer-loop, or the inner-loop. The first memory location accessed
2843 by the following dataref ('in' points to short):
2850 if LOOP=i_loop: &in (relative to i_loop)
2851 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2854 1. Return an SSA_NAME whose value is the address of the memory location of
2855 the first vector of the data reference.
2856 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2857 these statement(s) which define the returned SSA_NAME.
2859 FORNOW: We are only handling array accesses with step 1. */
2862 vect_create_addr_base_for_vector_ref (gimple stmt,
2863 gimple_seq *new_stmt_list,
2867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2868 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2869 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2871 tree data_ref_base_var;
2873 tree addr_base, addr_expr;
2875 gimple_seq seq = NULL;
2876 tree base_offset = unshare_expr (DR_OFFSET (dr));
2877 tree init = unshare_expr (DR_INIT (dr));
2879 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2880 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2883 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2885 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2887 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2889 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2890 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2891 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2895 base_name = build_fold_indirect_ref (data_ref_base);
2898 base_offset = ssize_int (0);
2899 init = ssize_int (0);
2900 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2903 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2904 add_referenced_var (data_ref_base_var);
2905 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2907 gimple_seq_add_seq (new_stmt_list, seq);
2909 /* Create base_offset */
2910 base_offset = size_binop (PLUS_EXPR,
2911 fold_convert (sizetype, base_offset),
2912 fold_convert (sizetype, init));
2913 dest = create_tmp_var (sizetype, "base_off");
2914 add_referenced_var (dest);
2915 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2916 gimple_seq_add_seq (new_stmt_list, seq);
2920 tree tmp = create_tmp_var (sizetype, "offset");
2922 add_referenced_var (tmp);
2923 offset = fold_build2 (MULT_EXPR, sizetype,
2924 fold_convert (sizetype, offset), step);
2925 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2926 base_offset, offset);
2927 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2928 gimple_seq_add_seq (new_stmt_list, seq);
2931 /* base + base_offset */
2933 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
2936 addr_base = build1 (ADDR_EXPR,
2937 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2938 unshare_expr (DR_REF (dr)));
2941 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2942 base = get_base_address (DR_REF (dr));
2944 && TREE_CODE (base) == MEM_REF)
2946 = build_qualified_type (vect_ptr_type,
2947 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2949 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2950 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2951 get_name (base_name));
2952 add_referenced_var (addr_expr);
2953 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2954 gimple_seq_add_seq (new_stmt_list, seq);
2956 if (DR_PTR_INFO (dr)
2957 && TREE_CODE (vec_stmt) == SSA_NAME)
2959 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2962 SSA_NAME_PTR_INFO (vec_stmt)->align = 1;
2963 SSA_NAME_PTR_INFO (vec_stmt)->misalign = 0;
2967 if (vect_print_dump_info (REPORT_DETAILS))
2969 fprintf (vect_dump, "created ");
2970 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2977 /* Function vect_create_data_ref_ptr.
2979 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
2980 location accessed in the loop by STMT, along with the def-use update
2981 chain to appropriately advance the pointer through the loop iterations.
2982 Also set aliasing information for the pointer. This pointer is used by
2983 the callers to this function to create a memory reference expression for
2984 vector load/store access.
2987 1. STMT: a stmt that references memory. Expected to be of the form
2988 GIMPLE_ASSIGN <name, data-ref> or
2989 GIMPLE_ASSIGN <data-ref, name>.
2990 2. AGGR_TYPE: the type of the reference, which should be either a vector
2992 3. AT_LOOP: the loop where the vector memref is to be created.
2993 4. OFFSET (optional): an offset to be added to the initial address accessed
2994 by the data-ref in STMT.
2995 5. BSI: location where the new stmts are to be placed if there is no loop
2996 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
2997 pointing to the initial address.
3000 1. Declare a new ptr to vector_type, and have it point to the base of the
3001 data reference (initial addressed accessed by the data reference).
3002 For example, for vector of type V8HI, the following code is generated:
3005 ap = (v8hi *)initial_address;
3007 if OFFSET is not supplied:
3008 initial_address = &a[init];
3009 if OFFSET is supplied:
3010 initial_address = &a[init + OFFSET];
3012 Return the initial_address in INITIAL_ADDRESS.
3014 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3015 update the pointer in each iteration of the loop.
3017 Return the increment stmt that updates the pointer in PTR_INCR.
3019 3. Set INV_P to true if the access pattern of the data reference in the
3020 vectorized loop is invariant. Set it to false otherwise.
3022 4. Return the pointer. */
3025 vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3026 tree offset, tree *initial_address,
3027 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3028 bool only_init, bool *inv_p)
3031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3032 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3033 struct loop *loop = NULL;
3034 bool nested_in_vect_loop = false;
3035 struct loop *containing_loop = NULL;
3040 gimple_seq new_stmt_list = NULL;
3044 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3046 gimple_stmt_iterator incr_gsi;
3049 tree indx_before_incr, indx_after_incr;
3052 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
3055 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3056 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3060 loop = LOOP_VINFO_LOOP (loop_vinfo);
3061 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3062 containing_loop = (gimple_bb (stmt))->loop_father;
3063 pe = loop_preheader_edge (loop);
3067 gcc_assert (bb_vinfo);
3072 /* Check the step (evolution) of the load in LOOP, and record
3073 whether it's invariant. */
3074 if (nested_in_vect_loop)
3075 step = STMT_VINFO_DR_STEP (stmt_info);
3077 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3079 if (tree_int_cst_compare (step, size_zero_node) == 0)
3083 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3085 /* Create an expression for the first address accessed by this load
3087 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3089 if (vect_print_dump_info (REPORT_DETAILS))
3091 tree data_ref_base = base_name;
3092 fprintf (vect_dump, "create %s-pointer variable to type: ",
3093 tree_code_name[(int) TREE_CODE (aggr_type)]);
3094 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
3095 if (TREE_CODE (data_ref_base) == VAR_DECL
3096 || TREE_CODE (data_ref_base) == ARRAY_REF)
3097 fprintf (vect_dump, " vectorizing an array ref: ");
3098 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3099 fprintf (vect_dump, " vectorizing a record based array ref: ");
3100 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3101 fprintf (vect_dump, " vectorizing a pointer ref: ");
3102 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3105 /* (1) Create the new aggregate-pointer variable. */
3106 aggr_ptr_type = build_pointer_type (aggr_type);
3107 base = get_base_address (DR_REF (dr));
3109 && TREE_CODE (base) == MEM_REF)
3111 = build_qualified_type (aggr_ptr_type,
3112 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3113 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3114 get_name (base_name));
3116 /* Vector and array types inherit the alias set of their component
3117 type by default so we need to use a ref-all pointer if the data
3118 reference does not conflict with the created aggregated data
3119 reference because it is not addressable. */
3120 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3121 get_alias_set (DR_REF (dr))))
3124 = build_pointer_type_for_mode (aggr_type,
3125 TYPE_MODE (aggr_ptr_type), true);
3126 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3127 get_name (base_name));
3130 /* Likewise for any of the data references in the stmt group. */
3131 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
3133 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
3136 tree lhs = gimple_assign_lhs (orig_stmt);
3137 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
3138 get_alias_set (lhs)))
3141 = build_pointer_type_for_mode (aggr_type,
3142 TYPE_MODE (aggr_ptr_type), true);
3144 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
3145 get_name (base_name));
3149 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
3154 add_referenced_var (aggr_ptr);
3156 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3157 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3158 def-use update cycles for the pointer: one relative to the outer-loop
3159 (LOOP), which is what steps (3) and (4) below do. The other is relative
3160 to the inner-loop (which is the inner-most loop containing the dataref),
3161 and this is done be step (5) below.
3163 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3164 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3165 redundant. Steps (3),(4) create the following:
3168 LOOP: vp1 = phi(vp0,vp2)
3174 If there is an inner-loop nested in loop, then step (5) will also be
3175 applied, and an additional update in the inner-loop will be created:
3178 LOOP: vp1 = phi(vp0,vp2)
3180 inner: vp3 = phi(vp1,vp4)
3181 vp4 = vp3 + inner_step
3187 /* (2) Calculate the initial address of the aggregate-pointer, and set
3188 the aggregate-pointer to point to it before the loop. */
3190 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3192 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3198 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3199 gcc_assert (!new_bb);
3202 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
3205 *initial_address = new_temp;
3207 /* Create: p = (aggr_type *) initial_base */
3208 if (TREE_CODE (new_temp) != SSA_NAME
3209 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
3211 vec_stmt = gimple_build_assign (aggr_ptr,
3212 fold_convert (aggr_ptr_type, new_temp));
3213 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
3214 /* Copy the points-to information if it exists. */
3215 if (DR_PTR_INFO (dr))
3216 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3217 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
3220 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3221 gcc_assert (!new_bb);
3224 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
3227 aggr_ptr_init = new_temp;
3229 /* (3) Handle the updating of the aggregate-pointer inside the loop.
3230 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3231 inner-loop nested in LOOP (during outer-loop vectorization). */
3233 /* No update in loop is required. */
3234 if (only_init && (!loop_vinfo || at_loop == loop))
3235 aptr = aggr_ptr_init;
3238 /* The step of the aggregate pointer is the type size. */
3239 tree step = TYPE_SIZE_UNIT (aggr_type);
3240 /* One exception to the above is when the scalar step of the load in
3241 LOOP is zero. In this case the step here is also zero. */
3243 step = size_zero_node;
3245 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3247 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3249 create_iv (aggr_ptr_init,
3250 fold_convert (aggr_ptr_type, step),
3251 aggr_ptr, loop, &incr_gsi, insert_after,
3252 &indx_before_incr, &indx_after_incr);
3253 incr = gsi_stmt (incr_gsi);
3254 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3256 /* Copy the points-to information if it exists. */
3257 if (DR_PTR_INFO (dr))
3259 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3260 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3265 aptr = indx_before_incr;
3268 if (!nested_in_vect_loop || only_init)
3272 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
3273 nested in LOOP, if exists. */
3275 gcc_assert (nested_in_vect_loop);
3278 standard_iv_increment_position (containing_loop, &incr_gsi,
3280 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
3281 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3283 incr = gsi_stmt (incr_gsi);
3284 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3286 /* Copy the points-to information if it exists. */
3287 if (DR_PTR_INFO (dr))
3289 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3290 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3295 return indx_before_incr;
3302 /* Function bump_vector_ptr
3304 Increment a pointer (to a vector type) by vector-size. If requested,
3305 i.e. if PTR-INCR is given, then also connect the new increment stmt
3306 to the existing def-use update-chain of the pointer, by modifying
3307 the PTR_INCR as illustrated below:
3309 The pointer def-use update-chain before this function:
3310 DATAREF_PTR = phi (p_0, p_2)
3312 PTR_INCR: p_2 = DATAREF_PTR + step
3314 The pointer def-use update-chain after this function:
3315 DATAREF_PTR = phi (p_0, p_2)
3317 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3319 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3322 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3324 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3325 the loop. The increment amount across iterations is expected
3327 BSI - location where the new update stmt is to be placed.
3328 STMT - the original scalar memory-access stmt that is being vectorized.
3329 BUMP - optional. The offset by which to bump the pointer. If not given,
3330 the offset is assumed to be vector_size.
3332 Output: Return NEW_DATAREF_PTR as illustrated above.
3337 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3338 gimple stmt, tree bump)
3340 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3341 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3342 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3343 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3344 tree update = TYPE_SIZE_UNIT (vectype);
3347 use_operand_p use_p;
3348 tree new_dataref_ptr;
3353 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3354 dataref_ptr, update);
3355 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3356 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3357 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3359 /* Copy the points-to information if it exists. */
3360 if (DR_PTR_INFO (dr))
3362 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3363 SSA_NAME_PTR_INFO (new_dataref_ptr)->align = 1;
3364 SSA_NAME_PTR_INFO (new_dataref_ptr)->misalign = 0;
3368 return new_dataref_ptr;
3370 /* Update the vector-pointer's cross-iteration increment. */
3371 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3373 tree use = USE_FROM_PTR (use_p);
3375 if (use == dataref_ptr)
3376 SET_USE (use_p, new_dataref_ptr);
3378 gcc_assert (tree_int_cst_compare (use, update) == 0);
3381 return new_dataref_ptr;
3385 /* Function vect_create_destination_var.
3387 Create a new temporary of type VECTYPE. */
3390 vect_create_destination_var (tree scalar_dest, tree vectype)
3393 const char *new_name;
3395 enum vect_var_kind kind;
3397 kind = vectype ? vect_simple_var : vect_scalar_var;
3398 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3400 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3402 new_name = get_name (scalar_dest);
3405 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3406 add_referenced_var (vec_dest);
3411 /* Function vect_strided_store_supported.
3413 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3414 and FALSE otherwise. */
3417 vect_strided_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
3419 optab interleave_high_optab, interleave_low_optab;
3420 enum machine_mode mode;
3422 mode = TYPE_MODE (vectype);
3424 /* vect_permute_store_chain requires the group size to be a power of two. */
3425 if (exact_log2 (count) == -1)
3427 if (vect_print_dump_info (REPORT_DETAILS))
3428 fprintf (vect_dump, "the size of the group of strided accesses"
3429 " is not a power of 2");
3433 /* Check that the operation is supported. */
3434 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3435 vectype, optab_default);
3436 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3437 vectype, optab_default);
3438 if (!interleave_high_optab || !interleave_low_optab)
3440 if (vect_print_dump_info (REPORT_DETAILS))
3441 fprintf (vect_dump, "no optab for interleave.");
3445 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3446 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3448 if (vect_print_dump_info (REPORT_DETAILS))
3449 fprintf (vect_dump, "interleave op not supported by target.");
3457 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
3461 vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3463 return vect_lanes_optab_supported_p ("vec_store_lanes",
3464 vec_store_lanes_optab,
3469 /* Function vect_permute_store_chain.
3471 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3472 a power of 2, generate interleave_high/low stmts to reorder the data
3473 correctly for the stores. Return the final references for stores in
3476 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3477 The input is 4 vectors each containing 8 elements. We assign a number to
3478 each element, the input sequence is:
3480 1st vec: 0 1 2 3 4 5 6 7
3481 2nd vec: 8 9 10 11 12 13 14 15
3482 3rd vec: 16 17 18 19 20 21 22 23
3483 4th vec: 24 25 26 27 28 29 30 31
3485 The output sequence should be:
3487 1st vec: 0 8 16 24 1 9 17 25
3488 2nd vec: 2 10 18 26 3 11 19 27
3489 3rd vec: 4 12 20 28 5 13 21 30
3490 4th vec: 6 14 22 30 7 15 23 31
3492 i.e., we interleave the contents of the four vectors in their order.
3494 We use interleave_high/low instructions to create such output. The input of
3495 each interleave_high/low operation is two vectors:
3498 the even elements of the result vector are obtained left-to-right from the
3499 high/low elements of the first vector. The odd elements of the result are
3500 obtained left-to-right from the high/low elements of the second vector.
3501 The output of interleave_high will be: 0 4 1 5
3502 and of interleave_low: 2 6 3 7
3505 The permutation is done in log LENGTH stages. In each stage interleave_high
3506 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3507 where the first argument is taken from the first half of DR_CHAIN and the
3508 second argument from it's second half.
3511 I1: interleave_high (1st vec, 3rd vec)
3512 I2: interleave_low (1st vec, 3rd vec)
3513 I3: interleave_high (2nd vec, 4th vec)
3514 I4: interleave_low (2nd vec, 4th vec)
3516 The output for the first stage is:
3518 I1: 0 16 1 17 2 18 3 19
3519 I2: 4 20 5 21 6 22 7 23
3520 I3: 8 24 9 25 10 26 11 27
3521 I4: 12 28 13 29 14 30 15 31
3523 The output of the second stage, i.e. the final result is:
3525 I1: 0 8 16 24 1 9 17 25
3526 I2: 2 10 18 26 3 11 19 27
3527 I3: 4 12 20 28 5 13 21 30
3528 I4: 6 14 22 30 7 15 23 31. */
3531 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3532 unsigned int length,
3534 gimple_stmt_iterator *gsi,
3535 VEC(tree,heap) **result_chain)
3537 tree perm_dest, vect1, vect2, high, low;
3539 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3542 enum tree_code high_code, low_code;
3544 gcc_assert (vect_strided_store_supported (vectype, length));
3546 *result_chain = VEC_copy (tree, heap, dr_chain);
3548 for (i = 0; i < exact_log2 (length); i++)
3550 for (j = 0; j < length/2; j++)
3552 vect1 = VEC_index (tree, dr_chain, j);
3553 vect2 = VEC_index (tree, dr_chain, j+length/2);
3555 /* Create interleaving stmt:
3556 in the case of big endian:
3557 high = interleave_high (vect1, vect2)
3558 and in the case of little endian:
3559 high = interleave_low (vect1, vect2). */
3560 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3561 DECL_GIMPLE_REG_P (perm_dest) = 1;
3562 add_referenced_var (perm_dest);
3563 if (BYTES_BIG_ENDIAN)
3565 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3566 low_code = VEC_INTERLEAVE_LOW_EXPR;
3570 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3571 high_code = VEC_INTERLEAVE_LOW_EXPR;
3573 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3575 high = make_ssa_name (perm_dest, perm_stmt);
3576 gimple_assign_set_lhs (perm_stmt, high);
3577 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3578 VEC_replace (tree, *result_chain, 2*j, high);
3580 /* Create interleaving stmt:
3581 in the case of big endian:
3582 low = interleave_low (vect1, vect2)
3583 and in the case of little endian:
3584 low = interleave_high (vect1, vect2). */
3585 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3586 DECL_GIMPLE_REG_P (perm_dest) = 1;
3587 add_referenced_var (perm_dest);
3588 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3590 low = make_ssa_name (perm_dest, perm_stmt);
3591 gimple_assign_set_lhs (perm_stmt, low);
3592 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3593 VEC_replace (tree, *result_chain, 2*j+1, low);
3595 dr_chain = VEC_copy (tree, heap, *result_chain);
3599 /* Function vect_setup_realignment
3601 This function is called when vectorizing an unaligned load using
3602 the dr_explicit_realign[_optimized] scheme.
3603 This function generates the following code at the loop prolog:
3606 x msq_init = *(floor(p)); # prolog load
3607 realignment_token = call target_builtin;
3609 x msq = phi (msq_init, ---)
3611 The stmts marked with x are generated only for the case of
3612 dr_explicit_realign_optimized.
3614 The code above sets up a new (vector) pointer, pointing to the first
3615 location accessed by STMT, and a "floor-aligned" load using that pointer.
3616 It also generates code to compute the "realignment-token" (if the relevant
3617 target hook was defined), and creates a phi-node at the loop-header bb
3618 whose arguments are the result of the prolog-load (created by this
3619 function) and the result of a load that takes place in the loop (to be
3620 created by the caller to this function).
3622 For the case of dr_explicit_realign_optimized:
3623 The caller to this function uses the phi-result (msq) to create the
3624 realignment code inside the loop, and sets up the missing phi argument,
3627 msq = phi (msq_init, lsq)
3628 lsq = *(floor(p')); # load in loop
3629 result = realign_load (msq, lsq, realignment_token);
3631 For the case of dr_explicit_realign:
3633 msq = *(floor(p)); # load in loop
3635 lsq = *(floor(p')); # load in loop
3636 result = realign_load (msq, lsq, realignment_token);
3639 STMT - (scalar) load stmt to be vectorized. This load accesses
3640 a memory location that may be unaligned.
3641 BSI - place where new code is to be inserted.
3642 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3646 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3647 target hook, if defined.
3648 Return value - the result of the loop-header phi node. */
3651 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3652 tree *realignment_token,
3653 enum dr_alignment_support alignment_support_scheme,
3655 struct loop **at_loop)
3657 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3658 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3659 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3660 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3661 struct loop *loop = NULL;
3663 tree scalar_dest = gimple_assign_lhs (stmt);
3670 tree msq_init = NULL_TREE;
3673 tree msq = NULL_TREE;
3674 gimple_seq stmts = NULL;
3676 bool compute_in_loop = false;
3677 bool nested_in_vect_loop = false;
3678 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3679 struct loop *loop_for_initial_load = NULL;
3683 loop = LOOP_VINFO_LOOP (loop_vinfo);
3684 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3687 gcc_assert (alignment_support_scheme == dr_explicit_realign
3688 || alignment_support_scheme == dr_explicit_realign_optimized);
3690 /* We need to generate three things:
3691 1. the misalignment computation
3692 2. the extra vector load (for the optimized realignment scheme).
3693 3. the phi node for the two vectors from which the realignment is
3694 done (for the optimized realignment scheme). */
3696 /* 1. Determine where to generate the misalignment computation.
3698 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3699 calculation will be generated by this function, outside the loop (in the
3700 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3701 caller, inside the loop.
3703 Background: If the misalignment remains fixed throughout the iterations of
3704 the loop, then both realignment schemes are applicable, and also the
3705 misalignment computation can be done outside LOOP. This is because we are
3706 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3707 are a multiple of VS (the Vector Size), and therefore the misalignment in
3708 different vectorized LOOP iterations is always the same.
3709 The problem arises only if the memory access is in an inner-loop nested
3710 inside LOOP, which is now being vectorized using outer-loop vectorization.
3711 This is the only case when the misalignment of the memory access may not
3712 remain fixed throughout the iterations of the inner-loop (as explained in
3713 detail in vect_supportable_dr_alignment). In this case, not only is the
3714 optimized realignment scheme not applicable, but also the misalignment
3715 computation (and generation of the realignment token that is passed to
3716 REALIGN_LOAD) have to be done inside the loop.
3718 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3719 or not, which in turn determines if the misalignment is computed inside
3720 the inner-loop, or outside LOOP. */
3722 if (init_addr != NULL_TREE || !loop_vinfo)
3724 compute_in_loop = true;
3725 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3729 /* 2. Determine where to generate the extra vector load.
3731 For the optimized realignment scheme, instead of generating two vector
3732 loads in each iteration, we generate a single extra vector load in the
3733 preheader of the loop, and in each iteration reuse the result of the
3734 vector load from the previous iteration. In case the memory access is in
3735 an inner-loop nested inside LOOP, which is now being vectorized using
3736 outer-loop vectorization, we need to determine whether this initial vector
3737 load should be generated at the preheader of the inner-loop, or can be
3738 generated at the preheader of LOOP. If the memory access has no evolution
3739 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3740 to be generated inside LOOP (in the preheader of the inner-loop). */
3742 if (nested_in_vect_loop)
3744 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3745 bool invariant_in_outerloop =
3746 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3747 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3750 loop_for_initial_load = loop;
3752 *at_loop = loop_for_initial_load;
3754 if (loop_for_initial_load)
3755 pe = loop_preheader_edge (loop_for_initial_load);
3757 /* 3. For the case of the optimized realignment, create the first vector
3758 load at the loop preheader. */
3760 if (alignment_support_scheme == dr_explicit_realign_optimized)
3762 /* Create msq_init = *(floor(p1)) in the loop preheader */
3764 gcc_assert (!compute_in_loop);
3765 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3766 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
3767 NULL_TREE, &init_addr, NULL, &inc,
3769 new_stmt = gimple_build_assign_with_ops
3770 (BIT_AND_EXPR, NULL_TREE, ptr,
3771 build_int_cst (TREE_TYPE (ptr),
3772 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3773 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3774 gimple_assign_set_lhs (new_stmt, new_temp);
3775 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3776 gcc_assert (!new_bb);
3778 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3779 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3780 new_stmt = gimple_build_assign (vec_dest, data_ref);
3781 new_temp = make_ssa_name (vec_dest, new_stmt);
3782 gimple_assign_set_lhs (new_stmt, new_temp);
3783 mark_symbols_for_renaming (new_stmt);
3786 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3787 gcc_assert (!new_bb);
3790 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3792 msq_init = gimple_assign_lhs (new_stmt);
3795 /* 4. Create realignment token using a target builtin, if available.
3796 It is done either inside the containing loop, or before LOOP (as
3797 determined above). */
3799 if (targetm.vectorize.builtin_mask_for_load)
3803 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3806 /* Generate the INIT_ADDR computation outside LOOP. */
3807 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3811 pe = loop_preheader_edge (loop);
3812 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3813 gcc_assert (!new_bb);
3816 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3819 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3820 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3822 vect_create_destination_var (scalar_dest,
3823 gimple_call_return_type (new_stmt));
3824 new_temp = make_ssa_name (vec_dest, new_stmt);
3825 gimple_call_set_lhs (new_stmt, new_temp);
3827 if (compute_in_loop)
3828 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3831 /* Generate the misalignment computation outside LOOP. */
3832 pe = loop_preheader_edge (loop);
3833 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3834 gcc_assert (!new_bb);
3837 *realignment_token = gimple_call_lhs (new_stmt);
3839 /* The result of the CALL_EXPR to this builtin is determined from
3840 the value of the parameter and no global variables are touched
3841 which makes the builtin a "const" function. Requiring the
3842 builtin to have the "const" attribute makes it unnecessary
3843 to call mark_call_clobbered. */
3844 gcc_assert (TREE_READONLY (builtin_decl));
3847 if (alignment_support_scheme == dr_explicit_realign)
3850 gcc_assert (!compute_in_loop);
3851 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3854 /* 5. Create msq = phi <msq_init, lsq> in loop */
3856 pe = loop_preheader_edge (containing_loop);
3857 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3858 msq = make_ssa_name (vec_dest, NULL);
3859 phi_stmt = create_phi_node (msq, containing_loop->header);
3860 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3861 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3867 /* Function vect_strided_load_supported.
3869 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3870 and FALSE otherwise. */
3873 vect_strided_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
3875 optab perm_even_optab, perm_odd_optab;
3876 enum machine_mode mode;
3878 mode = TYPE_MODE (vectype);
3880 /* vect_permute_load_chain requires the group size to be a power of two. */
3881 if (exact_log2 (count) == -1)
3883 if (vect_print_dump_info (REPORT_DETAILS))
3884 fprintf (vect_dump, "the size of the group of strided accesses"
3885 " is not a power of 2");
3889 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3891 if (!perm_even_optab)
3893 if (vect_print_dump_info (REPORT_DETAILS))
3894 fprintf (vect_dump, "no optab for perm_even.");
3898 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3900 if (vect_print_dump_info (REPORT_DETAILS))
3901 fprintf (vect_dump, "perm_even op not supported by target.");
3905 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3907 if (!perm_odd_optab)
3909 if (vect_print_dump_info (REPORT_DETAILS))
3910 fprintf (vect_dump, "no optab for perm_odd.");
3914 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3916 if (vect_print_dump_info (REPORT_DETAILS))
3917 fprintf (vect_dump, "perm_odd op not supported by target.");
3923 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
3927 vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
3929 return vect_lanes_optab_supported_p ("vec_load_lanes",
3930 vec_load_lanes_optab,
3934 /* Function vect_permute_load_chain.
3936 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3937 a power of 2, generate extract_even/odd stmts to reorder the input data
3938 correctly. Return the final references for loads in RESULT_CHAIN.
3940 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3941 The input is 4 vectors each containing 8 elements. We assign a number to each
3942 element, the input sequence is:
3944 1st vec: 0 1 2 3 4 5 6 7
3945 2nd vec: 8 9 10 11 12 13 14 15
3946 3rd vec: 16 17 18 19 20 21 22 23
3947 4th vec: 24 25 26 27 28 29 30 31
3949 The output sequence should be:
3951 1st vec: 0 4 8 12 16 20 24 28
3952 2nd vec: 1 5 9 13 17 21 25 29
3953 3rd vec: 2 6 10 14 18 22 26 30
3954 4th vec: 3 7 11 15 19 23 27 31
3956 i.e., the first output vector should contain the first elements of each
3957 interleaving group, etc.
3959 We use extract_even/odd instructions to create such output. The input of
3960 each extract_even/odd operation is two vectors
3964 and the output is the vector of extracted even/odd elements. The output of
3965 extract_even will be: 0 2 4 6
3966 and of extract_odd: 1 3 5 7
3969 The permutation is done in log LENGTH stages. In each stage extract_even
3970 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3971 their order. In our example,
3973 E1: extract_even (1st vec, 2nd vec)
3974 E2: extract_odd (1st vec, 2nd vec)
3975 E3: extract_even (3rd vec, 4th vec)
3976 E4: extract_odd (3rd vec, 4th vec)
3978 The output for the first stage will be:
3980 E1: 0 2 4 6 8 10 12 14
3981 E2: 1 3 5 7 9 11 13 15
3982 E3: 16 18 20 22 24 26 28 30
3983 E4: 17 19 21 23 25 27 29 31
3985 In order to proceed and create the correct sequence for the next stage (or
3986 for the correct output, if the second stage is the last one, as in our
3987 example), we first put the output of extract_even operation and then the
3988 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3989 The input for the second stage is:
3991 1st vec (E1): 0 2 4 6 8 10 12 14
3992 2nd vec (E3): 16 18 20 22 24 26 28 30
3993 3rd vec (E2): 1 3 5 7 9 11 13 15
3994 4th vec (E4): 17 19 21 23 25 27 29 31
3996 The output of the second stage:
3998 E1: 0 4 8 12 16 20 24 28
3999 E2: 2 6 10 14 18 22 26 30
4000 E3: 1 5 9 13 17 21 25 29
4001 E4: 3 7 11 15 19 23 27 31
4003 And RESULT_CHAIN after reordering:
4005 1st vec (E1): 0 4 8 12 16 20 24 28
4006 2nd vec (E3): 1 5 9 13 17 21 25 29
4007 3rd vec (E2): 2 6 10 14 18 22 26 30
4008 4th vec (E4): 3 7 11 15 19 23 27 31. */
4011 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4012 unsigned int length,
4014 gimple_stmt_iterator *gsi,
4015 VEC(tree,heap) **result_chain)
4017 tree perm_dest, data_ref, first_vect, second_vect;
4019 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4023 gcc_assert (vect_strided_load_supported (vectype, length));
4025 *result_chain = VEC_copy (tree, heap, dr_chain);
4026 for (i = 0; i < exact_log2 (length); i++)
4028 for (j = 0; j < length; j +=2)
4030 first_vect = VEC_index (tree, dr_chain, j);
4031 second_vect = VEC_index (tree, dr_chain, j+1);
4033 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4034 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4035 DECL_GIMPLE_REG_P (perm_dest) = 1;
4036 add_referenced_var (perm_dest);
4038 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
4039 perm_dest, first_vect,
4042 data_ref = make_ssa_name (perm_dest, perm_stmt);
4043 gimple_assign_set_lhs (perm_stmt, data_ref);
4044 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4045 mark_symbols_for_renaming (perm_stmt);
4047 VEC_replace (tree, *result_chain, j/2, data_ref);
4049 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4050 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4051 DECL_GIMPLE_REG_P (perm_dest) = 1;
4052 add_referenced_var (perm_dest);
4054 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
4055 perm_dest, first_vect,
4057 data_ref = make_ssa_name (perm_dest, perm_stmt);
4058 gimple_assign_set_lhs (perm_stmt, data_ref);
4059 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4060 mark_symbols_for_renaming (perm_stmt);
4062 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4064 dr_chain = VEC_copy (tree, heap, *result_chain);
4069 /* Function vect_transform_strided_load.
4071 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4072 to perform their permutation and ascribe the result vectorized statements to
4073 the scalar statements.
4077 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
4078 gimple_stmt_iterator *gsi)
4080 VEC(tree,heap) *result_chain = NULL;
4082 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4083 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4084 vectors, that are ready for vector computation. */
4085 result_chain = VEC_alloc (tree, heap, size);
4086 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
4087 vect_record_strided_load_vectors (stmt, result_chain);
4088 VEC_free (tree, heap, result_chain);
4091 /* RESULT_CHAIN contains the output of a group of strided loads that were
4092 generated as part of the vectorization of STMT. Assign the statement
4093 for each vector to the associated scalar statement. */
4096 vect_record_strided_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
4098 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
4099 gimple next_stmt, new_stmt;
4100 unsigned int i, gap_count;
4103 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4104 Since we scan the chain starting from it's first node, their order
4105 corresponds the order of data-refs in RESULT_CHAIN. */
4106 next_stmt = first_stmt;
4108 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
4113 /* Skip the gaps. Loads created for the gaps will be removed by dead
4114 code elimination pass later. No need to check for the first stmt in
4115 the group, since it always exists.
4116 GROUP_GAP is the number of steps in elements from the previous
4117 access (if there is no gap GROUP_GAP is 1). We skip loads that
4118 correspond to the gaps. */
4119 if (next_stmt != first_stmt
4120 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
4128 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4129 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4130 copies, and we put the new vector statement in the first available
4132 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4133 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4136 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4139 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4141 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4144 prev_stmt = rel_stmt;
4146 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4149 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4154 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
4156 /* If NEXT_STMT accesses the same DR as the previous statement,
4157 put the same TMP_DATA_REF as its vectorized statement; otherwise
4158 get the next data-ref from RESULT_CHAIN. */
4159 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4165 /* Function vect_force_dr_alignment_p.
4167 Returns whether the alignment of a DECL can be forced to be aligned
4168 on ALIGNMENT bit boundary. */
4171 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4173 if (TREE_CODE (decl) != VAR_DECL)
4176 if (DECL_EXTERNAL (decl))
4179 if (TREE_ASM_WRITTEN (decl))
4182 if (TREE_STATIC (decl))
4183 return (alignment <= MAX_OFILE_ALIGNMENT);
4185 return (alignment <= MAX_STACK_ALIGNMENT);
4189 /* Return whether the data reference DR is supported with respect to its
4191 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4192 it is aligned, i.e., check if it is possible to vectorize it with different
4195 enum dr_alignment_support
4196 vect_supportable_dr_alignment (struct data_reference *dr,
4197 bool check_aligned_accesses)
4199 gimple stmt = DR_STMT (dr);
4200 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4201 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4202 enum machine_mode mode = TYPE_MODE (vectype);
4203 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4204 struct loop *vect_loop = NULL;
4205 bool nested_in_vect_loop = false;
4207 if (aligned_access_p (dr) && !check_aligned_accesses)
4212 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4213 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4216 /* Possibly unaligned access. */
4218 /* We can choose between using the implicit realignment scheme (generating
4219 a misaligned_move stmt) and the explicit realignment scheme (generating
4220 aligned loads with a REALIGN_LOAD). There are two variants to the
4221 explicit realignment scheme: optimized, and unoptimized.
4222 We can optimize the realignment only if the step between consecutive
4223 vector loads is equal to the vector size. Since the vector memory
4224 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4225 is guaranteed that the misalignment amount remains the same throughout the
4226 execution of the vectorized loop. Therefore, we can create the
4227 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4228 at the loop preheader.
4230 However, in the case of outer-loop vectorization, when vectorizing a
4231 memory access in the inner-loop nested within the LOOP that is now being
4232 vectorized, while it is guaranteed that the misalignment of the
4233 vectorized memory access will remain the same in different outer-loop
4234 iterations, it is *not* guaranteed that is will remain the same throughout
4235 the execution of the inner-loop. This is because the inner-loop advances
4236 with the original scalar step (and not in steps of VS). If the inner-loop
4237 step happens to be a multiple of VS, then the misalignment remains fixed
4238 and we can use the optimized realignment scheme. For example:
4244 When vectorizing the i-loop in the above example, the step between
4245 consecutive vector loads is 1, and so the misalignment does not remain
4246 fixed across the execution of the inner-loop, and the realignment cannot
4247 be optimized (as illustrated in the following pseudo vectorized loop):
4249 for (i=0; i<N; i+=4)
4250 for (j=0; j<M; j++){
4251 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4252 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4253 // (assuming that we start from an aligned address).
4256 We therefore have to use the unoptimized realignment scheme:
4258 for (i=0; i<N; i+=4)
4259 for (j=k; j<M; j+=4)
4260 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4261 // that the misalignment of the initial address is
4264 The loop can then be vectorized as follows:
4266 for (k=0; k<4; k++){
4267 rt = get_realignment_token (&vp[k]);
4268 for (i=0; i<N; i+=4){
4270 for (j=k; j<M; j+=4){
4272 va = REALIGN_LOAD <v1,v2,rt>;
4279 if (DR_IS_READ (dr))
4281 bool is_packed = false;
4282 tree type = (TREE_TYPE (DR_REF (dr)));
4284 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4285 && (!targetm.vectorize.builtin_mask_for_load
4286 || targetm.vectorize.builtin_mask_for_load ()))
4288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4289 if ((nested_in_vect_loop
4290 && (TREE_INT_CST_LOW (DR_STEP (dr))
4291 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4293 return dr_explicit_realign;
4295 return dr_explicit_realign_optimized;
4297 if (!known_alignment_for_access_p (dr))
4299 tree ba = DR_BASE_OBJECT (dr);
4302 is_packed = contains_packed_reference (ba);
4305 if (targetm.vectorize.
4306 support_vector_misalignment (mode, type,
4307 DR_MISALIGNMENT (dr), is_packed))
4308 /* Can't software pipeline the loads, but can at least do them. */
4309 return dr_unaligned_supported;
4313 bool is_packed = false;
4314 tree type = (TREE_TYPE (DR_REF (dr)));
4316 if (!known_alignment_for_access_p (dr))
4318 tree ba = DR_BASE_OBJECT (dr);
4321 is_packed = contains_packed_reference (ba);
4324 if (targetm.vectorize.
4325 support_vector_misalignment (mode, type,
4326 DR_MISALIGNMENT (dr), is_packed))
4327 return dr_unaligned_supported;
4331 return dr_unaligned_unsupported;