1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "tree-pretty-print.h"
33 #include "gimple-pretty-print.h"
34 #include "tree-flow.h"
35 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
43 /* Need to include rtl.h, expr.h, etc. for optabs. */
47 /* Return the smallest scalar part of STMT.
48 This is used to determine the vectype of the stmt. We generally set the
49 vectype according to the type of the result (lhs). For stmts whose
50 result-type is different than the type of the arguments (e.g., demotion,
51 promotion), vectype will be reset appropriately (later). Note that we have
52 to visit the smallest datatype in this function, because that determines the
53 VF. If the smallest datatype in the loop is present only as the rhs of a
54 promotion operation - we'd miss it.
55 Such a case, where a variable of this datatype does not appear in the lhs
56 anywhere in the loop, can only occur if it's an invariant: e.g.:
57 'int_x = (int) short_inv', which we'd expect to have been optimized away by
58 invariant motion. However, we cannot rely on invariant motion to always
59 take invariants out of the loop, and so in the case of promotion we also
60 have to check the rhs.
61 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
65 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
66 HOST_WIDE_INT *rhs_size_unit)
68 tree scalar_type = gimple_expr_type (stmt);
69 HOST_WIDE_INT lhs, rhs;
71 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
73 if (is_gimple_assign (stmt)
74 && (gimple_assign_cast_p (stmt)
75 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
76 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
78 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
80 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
82 scalar_type = rhs_type;
91 /* Find the place of the data-ref in STMT in the interleaving chain that starts
92 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
95 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
97 gimple next_stmt = first_stmt;
100 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
103 while (next_stmt && next_stmt != stmt)
106 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
116 /* Function vect_insert_into_interleaving_chain.
118 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
121 vect_insert_into_interleaving_chain (struct data_reference *dra,
122 struct data_reference *drb)
126 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
127 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
129 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
130 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
133 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
134 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
137 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
138 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
142 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
145 /* We got to the end of the list. Insert here. */
146 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
147 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
151 /* Function vect_update_interleaving_chain.
153 For two data-refs DRA and DRB that are a part of a chain interleaved data
154 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
156 There are four possible cases:
157 1. New stmts - both DRA and DRB are not a part of any chain:
160 2. DRB is a part of a chain and DRA is not:
161 no need to update FIRST_DR
162 no need to insert DRB
163 insert DRA according to init
164 3. DRA is a part of a chain and DRB is not:
165 if (init of FIRST_DR > init of DRB)
167 NEXT(FIRST_DR) = previous FIRST_DR
169 insert DRB according to its init
170 4. both DRA and DRB are in some interleaving chains:
171 choose the chain with the smallest init of FIRST_DR
172 insert the nodes of the second chain into the first one. */
175 vect_update_interleaving_chain (struct data_reference *drb,
176 struct data_reference *dra)
178 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
179 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
180 tree next_init, init_dra_chain, init_drb_chain;
181 gimple first_a, first_b;
183 gimple node, prev, next, first_stmt;
185 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
186 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
188 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
189 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
190 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
194 /* 2. DRB is a part of a chain and DRA is not. */
195 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
197 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
198 /* Insert DRA into the chain of DRB. */
199 vect_insert_into_interleaving_chain (dra, drb);
203 /* 3. DRA is a part of a chain and DRB is not. */
204 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
206 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
207 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
211 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
213 /* DRB's init is smaller than the init of the stmt previously marked
214 as the first stmt of the interleaving chain of DRA. Therefore, we
215 update FIRST_STMT and put DRB in the head of the list. */
216 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
217 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
219 /* Update all the stmts in the list to point to the new FIRST_STMT. */
220 tmp = old_first_stmt;
223 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
224 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
229 /* Insert DRB in the list of DRA. */
230 vect_insert_into_interleaving_chain (drb, dra);
231 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
236 /* 4. both DRA and DRB are in some interleaving chains. */
237 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
238 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
239 if (first_a == first_b)
241 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
242 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
244 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
246 /* Insert the nodes of DRA chain into the DRB chain.
247 After inserting a node, continue from this node of the DRB chain (don't
248 start from the beginning. */
249 node = DR_GROUP_FIRST_DR (stmtinfo_a);
250 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
251 first_stmt = first_b;
255 /* Insert the nodes of DRB chain into the DRA chain.
256 After inserting a node, continue from this node of the DRA chain (don't
257 start from the beginning. */
258 node = DR_GROUP_FIRST_DR (stmtinfo_b);
259 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
260 first_stmt = first_a;
265 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
266 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
269 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
270 if (tree_int_cst_compare (next_init, node_init) > 0)
273 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
274 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
279 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
283 /* We got to the end of the list. Insert here. */
284 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
285 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
288 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
289 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
294 /* Function vect_equal_offsets.
296 Check if OFFSET1 and OFFSET2 are identical expressions. */
299 vect_equal_offsets (tree offset1, tree offset2)
303 STRIP_NOPS (offset1);
304 STRIP_NOPS (offset2);
306 if (offset1 == offset2)
309 if (TREE_CODE (offset1) != TREE_CODE (offset2)
310 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
313 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
314 TREE_OPERAND (offset2, 0));
316 if (!res || !BINARY_CLASS_P (offset1))
319 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
320 TREE_OPERAND (offset2, 1));
326 /* Check dependence between DRA and DRB for basic block vectorization.
327 If the accesses share same bases and offsets, we can compare their initial
328 constant offsets to decide whether they differ or not. In case of a read-
329 write dependence we check that the load is before the store to ensure that
330 vectorization will not change the order of the accesses. */
333 vect_drs_dependent_in_basic_block (struct data_reference *dra,
334 struct data_reference *drb)
336 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
339 /* We only call this function for pairs of loads and stores, but we verify
341 if (DR_IS_READ (dra) == DR_IS_READ (drb))
343 if (DR_IS_READ (dra))
349 /* Check that the data-refs have same bases and offsets. If not, we can't
350 determine if they are dependent. */
351 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
352 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
353 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
354 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
355 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
356 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb)))
359 /* Check the types. */
360 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
361 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
363 if (type_size_a != type_size_b
364 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
365 TREE_TYPE (DR_REF (drb))))
368 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
369 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
371 /* Two different locations - no dependence. */
372 if (init_a != init_b)
375 /* We have a read-write dependence. Check that the load is before the store.
376 When we vectorize basic blocks, vector load can be only before
377 corresponding scalar load, and vector store can be only after its
378 corresponding scalar store. So the order of the acceses is preserved in
379 case the load is before the store. */
380 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
381 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
388 /* Function vect_check_interleaving.
390 Check if DRA and DRB are a part of interleaving. In case they are, insert
391 DRA and DRB in an interleaving chain. */
394 vect_check_interleaving (struct data_reference *dra,
395 struct data_reference *drb)
397 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
399 /* Check that the data-refs have same first location (except init) and they
400 are both either store or load (not load and store). */
401 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
402 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
403 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
404 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
405 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
406 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
407 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
408 || DR_IS_READ (dra) != DR_IS_READ (drb))
412 1. data-refs are of the same type
413 2. their steps are equal
414 3. the step (if greater than zero) is greater than the difference between
416 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
417 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
419 if (type_size_a != type_size_b
420 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
421 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
422 TREE_TYPE (DR_REF (drb))))
425 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
426 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
427 step = TREE_INT_CST_LOW (DR_STEP (dra));
431 /* If init_a == init_b + the size of the type * k, we have an interleaving,
432 and DRB is accessed before DRA. */
433 diff_mod_size = (init_a - init_b) % type_size_a;
435 if (step && (init_a - init_b) > step)
438 if (diff_mod_size == 0)
440 vect_update_interleaving_chain (drb, dra);
441 if (vect_print_dump_info (REPORT_DR_DETAILS))
443 fprintf (vect_dump, "Detected interleaving ");
444 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
445 fprintf (vect_dump, " and ");
446 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
453 /* If init_b == init_a + the size of the type * k, we have an
454 interleaving, and DRA is accessed before DRB. */
455 diff_mod_size = (init_b - init_a) % type_size_a;
457 if (step && (init_b - init_a) > step)
460 if (diff_mod_size == 0)
462 vect_update_interleaving_chain (dra, drb);
463 if (vect_print_dump_info (REPORT_DR_DETAILS))
465 fprintf (vect_dump, "Detected interleaving ");
466 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
467 fprintf (vect_dump, " and ");
468 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
477 /* Check if data references pointed by DR_I and DR_J are same or
478 belong to same interleaving group. Return FALSE if drs are
479 different, otherwise return TRUE. */
482 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
484 gimple stmt_i = DR_STMT (dr_i);
485 gimple stmt_j = DR_STMT (dr_j);
487 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
488 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
489 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
490 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
491 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
497 /* If address ranges represented by DDR_I and DDR_J are equal,
498 return TRUE, otherwise return FALSE. */
501 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
503 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
504 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
505 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
506 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
512 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
513 tested at run-time. Return TRUE if DDR was successfully inserted.
514 Return false if versioning is not supported. */
517 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
519 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
521 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
524 if (vect_print_dump_info (REPORT_DR_DETAILS))
526 fprintf (vect_dump, "mark for run-time aliasing test between ");
527 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
528 fprintf (vect_dump, " and ");
529 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
532 if (optimize_loop_nest_for_size_p (loop))
534 if (vect_print_dump_info (REPORT_DR_DETAILS))
535 fprintf (vect_dump, "versioning not supported when optimizing for size.");
539 /* FORNOW: We don't support versioning with outer-loop vectorization. */
542 if (vect_print_dump_info (REPORT_DR_DETAILS))
543 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
547 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
552 /* Function vect_analyze_data_ref_dependence.
554 Return TRUE if there (might) exist a dependence between a memory-reference
555 DRA and a memory-reference DRB. When versioning for alias may check a
556 dependence at run-time, return FALSE. Adjust *MAX_VF according to
557 the data dependence. */
560 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
561 loop_vec_info loop_vinfo, int *max_vf,
562 bool *data_dependence_in_bb)
565 struct loop *loop = NULL;
566 struct data_reference *dra = DDR_A (ddr);
567 struct data_reference *drb = DDR_B (ddr);
568 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
569 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
570 lambda_vector dist_v;
571 unsigned int loop_depth;
573 /* Don't bother to analyze statements marked as unvectorizable. */
574 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
575 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
578 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
580 /* Independent data accesses. */
581 vect_check_interleaving (dra, drb);
586 loop = LOOP_VINFO_LOOP (loop_vinfo);
588 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
591 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
595 if (vect_print_dump_info (REPORT_DR_DETAILS))
597 fprintf (vect_dump, "versioning for alias required: "
598 "can't determine dependence between ");
599 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
600 fprintf (vect_dump, " and ");
601 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
604 /* Add to list of ddrs that need to be tested at run-time. */
605 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
608 /* When vectorizing a basic block unknown depnedence can still mean
610 if (vect_check_interleaving (dra, drb))
613 if (vect_print_dump_info (REPORT_DR_DETAILS))
615 fprintf (vect_dump, "can't determine dependence between ");
616 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
617 fprintf (vect_dump, " and ");
618 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
621 /* We do not vectorize basic blocks with write-write dependencies. */
622 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
625 /* We deal with read-write dependencies in basic blocks later (by
626 verifying that all the loads in the basic block are before all the
628 *data_dependence_in_bb = true;
632 /* Versioning for alias is not yet supported for basic block SLP, and
633 dependence distance is unapplicable, hence, in case of known data
634 dependence, basic block vectorization is impossible for now. */
637 if (dra != drb && vect_check_interleaving (dra, drb))
640 if (vect_print_dump_info (REPORT_DR_DETAILS))
642 fprintf (vect_dump, "determined dependence between ");
643 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644 fprintf (vect_dump, " and ");
645 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
648 /* Do not vectorize basic blcoks with write-write dependences. */
649 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
652 /* Check if this dependence is allowed in basic block vectorization. */
653 return vect_drs_dependent_in_basic_block (dra, drb);
656 /* Loop-based vectorization and known data dependence. */
657 if (DDR_NUM_DIST_VECTS (ddr) == 0)
659 if (vect_print_dump_info (REPORT_DR_DETAILS))
661 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
662 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
663 fprintf (vect_dump, " and ");
664 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
666 /* Add to list of ddrs that need to be tested at run-time. */
667 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
670 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
671 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
673 int dist = dist_v[loop_depth];
675 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 fprintf (vect_dump, "dependence distance = %d.", dist);
680 if (vect_print_dump_info (REPORT_DR_DETAILS))
682 fprintf (vect_dump, "dependence distance == 0 between ");
683 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
684 fprintf (vect_dump, " and ");
685 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
688 /* For interleaving, mark that there is a read-write dependency if
689 necessary. We check before that one of the data-refs is store. */
690 if (DR_IS_READ (dra))
691 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
694 if (DR_IS_READ (drb))
695 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
701 if (dist > 0 && DDR_REVERSED_P (ddr))
703 /* If DDR_REVERSED_P the order of the data-refs in DDR was
704 reversed (to make distance vector positive), and the actual
705 distance is negative. */
706 if (vect_print_dump_info (REPORT_DR_DETAILS))
707 fprintf (vect_dump, "dependence distance negative.");
712 && abs (dist) < *max_vf)
714 /* The dependence distance requires reduction of the maximal
715 vectorization factor. */
716 *max_vf = abs (dist);
717 if (vect_print_dump_info (REPORT_DR_DETAILS))
718 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
722 if (abs (dist) >= *max_vf)
724 /* Dependence distance does not create dependence, as far as
725 vectorization is concerned, in this case. */
726 if (vect_print_dump_info (REPORT_DR_DETAILS))
727 fprintf (vect_dump, "dependence distance >= VF.");
731 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
733 fprintf (vect_dump, "not vectorized, possible dependence "
734 "between data-refs ");
735 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
736 fprintf (vect_dump, " and ");
737 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
746 /* Function vect_analyze_data_ref_dependences.
748 Examine all the data references in the loop, and make sure there do not
749 exist any data dependences between them. Set *MAX_VF according to
750 the maximum vectorization factor the data dependences allow. */
753 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
754 bb_vec_info bb_vinfo, int *max_vf,
755 bool *data_dependence_in_bb)
758 VEC (ddr_p, heap) *ddrs = NULL;
759 struct data_dependence_relation *ddr;
761 if (vect_print_dump_info (REPORT_DETAILS))
762 fprintf (vect_dump, "=== vect_analyze_dependences ===");
765 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
767 ddrs = BB_VINFO_DDRS (bb_vinfo);
769 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
770 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf,
771 data_dependence_in_bb))
778 /* Function vect_compute_data_ref_alignment
780 Compute the misalignment of the data reference DR.
783 1. If during the misalignment computation it is found that the data reference
784 cannot be vectorized then false is returned.
785 2. DR_MISALIGNMENT (DR) is defined.
787 FOR NOW: No analysis is actually performed. Misalignment is calculated
788 only for trivial cases. TODO. */
791 vect_compute_data_ref_alignment (struct data_reference *dr)
793 gimple stmt = DR_STMT (dr);
794 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
795 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
796 struct loop *loop = NULL;
797 tree ref = DR_REF (dr);
799 tree base, base_addr;
802 tree aligned_to, alignment;
804 if (vect_print_dump_info (REPORT_DETAILS))
805 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
808 loop = LOOP_VINFO_LOOP (loop_vinfo);
810 /* Initialize misalignment to unknown. */
811 SET_DR_MISALIGNMENT (dr, -1);
813 misalign = DR_INIT (dr);
814 aligned_to = DR_ALIGNED_TO (dr);
815 base_addr = DR_BASE_ADDRESS (dr);
816 vectype = STMT_VINFO_VECTYPE (stmt_info);
818 /* In case the dataref is in an inner-loop of the loop that is being
819 vectorized (LOOP), we use the base and misalignment information
820 relative to the outer-loop (LOOP). This is ok only if the misalignment
821 stays the same throughout the execution of the inner-loop, which is why
822 we have to check that the stride of the dataref in the inner-loop evenly
823 divides by the vector size. */
824 if (loop && nested_in_vect_loop_p (loop, stmt))
826 tree step = DR_STEP (dr);
827 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
829 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
831 if (vect_print_dump_info (REPORT_ALIGNMENT))
832 fprintf (vect_dump, "inner step divides the vector-size.");
833 misalign = STMT_VINFO_DR_INIT (stmt_info);
834 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
835 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
839 if (vect_print_dump_info (REPORT_ALIGNMENT))
840 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
841 misalign = NULL_TREE;
845 base = build_fold_indirect_ref (base_addr);
846 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
848 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
851 if (vect_print_dump_info (REPORT_ALIGNMENT))
853 fprintf (vect_dump, "Unknown alignment for access: ");
854 print_generic_expr (vect_dump, base, TDF_SLIM);
860 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
862 || (TREE_CODE (base_addr) == SSA_NAME
863 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
864 TREE_TYPE (base_addr)))),
868 base_aligned = false;
872 /* Do not change the alignment of global variables if
873 flag_section_anchors is enabled. */
874 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
875 || (TREE_STATIC (base) && flag_section_anchors))
877 if (vect_print_dump_info (REPORT_DETAILS))
879 fprintf (vect_dump, "can't force alignment of ref: ");
880 print_generic_expr (vect_dump, ref, TDF_SLIM);
885 /* Force the alignment of the decl.
886 NOTE: This is the only change to the code we make during
887 the analysis phase, before deciding to vectorize the loop. */
888 if (vect_print_dump_info (REPORT_DETAILS))
890 fprintf (vect_dump, "force alignment of ");
891 print_generic_expr (vect_dump, ref, TDF_SLIM);
894 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
895 DECL_USER_ALIGN (base) = 1;
898 /* At this point we assume that the base is aligned. */
899 gcc_assert (base_aligned
900 || (TREE_CODE (base) == VAR_DECL
901 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
903 /* Modulo alignment. */
904 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
906 if (!host_integerp (misalign, 1))
908 /* Negative or overflowed misalignment value. */
909 if (vect_print_dump_info (REPORT_DETAILS))
910 fprintf (vect_dump, "unexpected misalign value");
914 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
916 if (vect_print_dump_info (REPORT_DETAILS))
918 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
919 print_generic_expr (vect_dump, ref, TDF_SLIM);
926 /* Function vect_compute_data_refs_alignment
928 Compute the misalignment of data references in the loop.
929 Return FALSE if a data reference is found that cannot be vectorized. */
932 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
933 bb_vec_info bb_vinfo)
935 VEC (data_reference_p, heap) *datarefs;
936 struct data_reference *dr;
940 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
942 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
944 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
945 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
946 && !vect_compute_data_ref_alignment (dr))
950 /* Mark unsupported statement as unvectorizable. */
951 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
962 /* Function vect_update_misalignment_for_peel
964 DR - the data reference whose misalignment is to be adjusted.
965 DR_PEEL - the data reference whose misalignment is being made
966 zero in the vector loop by the peel.
967 NPEEL - the number of iterations in the peel loop if the misalignment
968 of DR_PEEL is known at compile time. */
971 vect_update_misalignment_for_peel (struct data_reference *dr,
972 struct data_reference *dr_peel, int npeel)
975 VEC(dr_p,heap) *same_align_drs;
976 struct data_reference *current_dr;
977 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
978 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
979 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
980 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
982 /* For interleaved data accesses the step in the loop must be multiplied by
983 the size of the interleaving group. */
984 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
985 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
986 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
987 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
989 /* It can be assumed that the data refs with the same alignment as dr_peel
990 are aligned in the vector loop. */
992 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
993 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
995 if (current_dr != dr)
997 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
998 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
999 SET_DR_MISALIGNMENT (dr, 0);
1003 if (known_alignment_for_access_p (dr)
1004 && known_alignment_for_access_p (dr_peel))
1006 int misal = DR_MISALIGNMENT (dr);
1007 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1008 misal += npeel * dr_size;
1009 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
1010 SET_DR_MISALIGNMENT (dr, misal);
1014 if (vect_print_dump_info (REPORT_DETAILS))
1015 fprintf (vect_dump, "Setting misalignment to -1.");
1016 SET_DR_MISALIGNMENT (dr, -1);
1020 /* Function vect_verify_datarefs_alignment
1022 Return TRUE if all data references in the loop can be
1023 handled with respect to alignment. */
1026 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1028 VEC (data_reference_p, heap) *datarefs;
1029 struct data_reference *dr;
1030 enum dr_alignment_support supportable_dr_alignment;
1034 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1036 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1038 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1040 gimple stmt = DR_STMT (dr);
1041 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1043 /* For interleaving, only the alignment of the first access matters.
1044 Skip statements marked as not vectorizable. */
1045 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
1046 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1047 || !STMT_VINFO_VECTORIZABLE (stmt_info))
1050 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1051 if (!supportable_dr_alignment)
1053 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1055 if (DR_IS_READ (dr))
1057 "not vectorized: unsupported unaligned load.");
1060 "not vectorized: unsupported unaligned store.");
1062 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1066 if (supportable_dr_alignment != dr_aligned
1067 && vect_print_dump_info (REPORT_ALIGNMENT))
1068 fprintf (vect_dump, "Vectorizing an unaligned access.");
1074 /* Function vector_alignment_reachable_p
1076 Return true if vector alignment for DR is reachable by peeling
1077 a few loop iterations. Return false otherwise. */
1080 vector_alignment_reachable_p (struct data_reference *dr)
1082 gimple stmt = DR_STMT (dr);
1083 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1084 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1086 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1088 /* For interleaved access we peel only if number of iterations in
1089 the prolog loop ({VF - misalignment}), is a multiple of the
1090 number of the interleaved accesses. */
1091 int elem_size, mis_in_elements;
1092 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1094 /* FORNOW: handle only known alignment. */
1095 if (!known_alignment_for_access_p (dr))
1098 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1099 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1101 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1105 /* If misalignment is known at the compile time then allow peeling
1106 only if natural alignment is reachable through peeling. */
1107 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1109 HOST_WIDE_INT elmsize =
1110 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1111 if (vect_print_dump_info (REPORT_DETAILS))
1113 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1114 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1116 if (DR_MISALIGNMENT (dr) % elmsize)
1118 if (vect_print_dump_info (REPORT_DETAILS))
1119 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1124 if (!known_alignment_for_access_p (dr))
1126 tree type = (TREE_TYPE (DR_REF (dr)));
1127 tree ba = DR_BASE_OBJECT (dr);
1128 bool is_packed = false;
1131 is_packed = contains_packed_reference (ba);
1133 if (vect_print_dump_info (REPORT_DETAILS))
1134 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1135 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1145 /* Calculate the cost of the memory access represented by DR. */
1148 vect_get_data_access_cost (struct data_reference *dr,
1149 unsigned int *inside_cost,
1150 unsigned int *outside_cost)
1152 gimple stmt = DR_STMT (dr);
1153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1154 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1155 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1156 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1157 int ncopies = vf / nunits;
1158 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1160 if (!supportable_dr_alignment)
1161 *inside_cost = VECT_MAX_COST;
1164 if (DR_IS_READ (dr))
1165 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost);
1167 vect_get_store_cost (dr, ncopies, inside_cost);
1170 if (vect_print_dump_info (REPORT_COST))
1171 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1172 "outside_cost = %d.", *inside_cost, *outside_cost);
1177 vect_peeling_hash (const void *elem)
1179 const struct _vect_peel_info *peel_info;
1181 peel_info = (const struct _vect_peel_info *) elem;
1182 return (hashval_t) peel_info->npeel;
1187 vect_peeling_hash_eq (const void *elem1, const void *elem2)
1189 const struct _vect_peel_info *a, *b;
1191 a = (const struct _vect_peel_info *) elem1;
1192 b = (const struct _vect_peel_info *) elem2;
1193 return (a->npeel == b->npeel);
1197 /* Insert DR into peeling hash table with NPEEL as key. */
1200 vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1203 struct _vect_peel_info elem, *slot;
1205 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1208 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1214 slot = XNEW (struct _vect_peel_info);
1215 slot->npeel = npeel;
1218 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1223 if (!supportable_dr_alignment && !flag_vect_cost_model)
1224 slot->count += VECT_MAX_COST;
1228 /* Traverse peeling hash table to find peeling option that aligns maximum
1229 number of data accesses. */
1232 vect_peeling_hash_get_most_frequent (void **slot, void *data)
1234 vect_peel_info elem = (vect_peel_info) *slot;
1235 vect_peel_extended_info max = (vect_peel_extended_info) data;
1237 if (elem->count > max->peel_info.count)
1239 max->peel_info.npeel = elem->npeel;
1240 max->peel_info.count = elem->count;
1241 max->peel_info.dr = elem->dr;
1248 /* Traverse peeling hash table and calculate cost for each peeling option.
1249 Find the one with the lowest cost. */
1252 vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1254 vect_peel_info elem = (vect_peel_info) *slot;
1255 vect_peel_extended_info min = (vect_peel_extended_info) data;
1256 int save_misalignment, dummy;
1257 unsigned int inside_cost = 0, outside_cost = 0, i;
1258 gimple stmt = DR_STMT (elem->dr);
1259 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1261 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1262 struct data_reference *dr;
1264 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1266 stmt = DR_STMT (dr);
1267 stmt_info = vinfo_for_stmt (stmt);
1268 /* For interleaving, only the alignment of the first access
1270 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1271 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1274 save_misalignment = DR_MISALIGNMENT (dr);
1275 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
1276 vect_get_data_access_cost (dr, &inside_cost, &outside_cost);
1277 SET_DR_MISALIGNMENT (dr, save_misalignment);
1280 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
1281 vect_get_single_scalar_iteraion_cost (loop_vinfo));
1283 if (inside_cost < min->inside_cost
1284 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1286 min->inside_cost = inside_cost;
1287 min->outside_cost = outside_cost;
1288 min->peel_info.dr = elem->dr;
1289 min->peel_info.npeel = elem->npeel;
1296 /* Choose best peeling option by traversing peeling hash table and either
1297 choosing an option with the lowest cost (if cost model is enabled) or the
1298 option that aligns as many accesses as possible. */
1300 static struct data_reference *
1301 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
1302 unsigned int *npeel)
1304 struct _vect_peel_extended_info res;
1306 res.peel_info.dr = NULL;
1308 if (flag_vect_cost_model)
1310 res.inside_cost = INT_MAX;
1311 res.outside_cost = INT_MAX;
1312 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1313 vect_peeling_hash_get_lowest_cost, &res);
1317 res.peel_info.count = 0;
1318 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1319 vect_peeling_hash_get_most_frequent, &res);
1322 *npeel = res.peel_info.npeel;
1323 return res.peel_info.dr;
1327 /* Function vect_enhance_data_refs_alignment
1329 This pass will use loop versioning and loop peeling in order to enhance
1330 the alignment of data references in the loop.
1332 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1333 original loop is to be vectorized. Any other loops that are created by
1334 the transformations performed in this pass - are not supposed to be
1335 vectorized. This restriction will be relaxed.
1337 This pass will require a cost model to guide it whether to apply peeling
1338 or versioning or a combination of the two. For example, the scheme that
1339 intel uses when given a loop with several memory accesses, is as follows:
1340 choose one memory access ('p') which alignment you want to force by doing
1341 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1342 other accesses are not necessarily aligned, or (2) use loop versioning to
1343 generate one loop in which all accesses are aligned, and another loop in
1344 which only 'p' is necessarily aligned.
1346 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1347 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1348 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1350 Devising a cost model is the most critical aspect of this work. It will
1351 guide us on which access to peel for, whether to use loop versioning, how
1352 many versions to create, etc. The cost model will probably consist of
1353 generic considerations as well as target specific considerations (on
1354 powerpc for example, misaligned stores are more painful than misaligned
1357 Here are the general steps involved in alignment enhancements:
1359 -- original loop, before alignment analysis:
1360 for (i=0; i<N; i++){
1361 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1362 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1365 -- After vect_compute_data_refs_alignment:
1366 for (i=0; i<N; i++){
1367 x = q[i]; # DR_MISALIGNMENT(q) = 3
1368 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1371 -- Possibility 1: we do loop versioning:
1373 for (i=0; i<N; i++){ # loop 1A
1374 x = q[i]; # DR_MISALIGNMENT(q) = 3
1375 p[i] = y; # DR_MISALIGNMENT(p) = 0
1379 for (i=0; i<N; i++){ # loop 1B
1380 x = q[i]; # DR_MISALIGNMENT(q) = 3
1381 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1385 -- Possibility 2: we do loop peeling:
1386 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1390 for (i = 3; i < N; i++){ # loop 2A
1391 x = q[i]; # DR_MISALIGNMENT(q) = 0
1392 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1395 -- Possibility 3: combination of loop peeling and versioning:
1396 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1401 for (i = 3; i<N; i++){ # loop 3A
1402 x = q[i]; # DR_MISALIGNMENT(q) = 0
1403 p[i] = y; # DR_MISALIGNMENT(p) = 0
1407 for (i = 3; i<N; i++){ # loop 3B
1408 x = q[i]; # DR_MISALIGNMENT(q) = 0
1409 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1413 These loops are later passed to loop_transform to be vectorized. The
1414 vectorizer will use the alignment information to guide the transformation
1415 (whether to generate regular loads/stores, or with special handling for
1419 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1421 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1422 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1423 enum dr_alignment_support supportable_dr_alignment;
1424 struct data_reference *dr0 = NULL, *first_store = NULL;
1425 struct data_reference *dr;
1427 bool do_peeling = false;
1428 bool do_versioning = false;
1431 stmt_vec_info stmt_info;
1432 int vect_versioning_for_alias_required;
1433 unsigned int npeel = 0;
1434 bool all_misalignments_unknown = true;
1435 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1436 unsigned possible_npeel_number = 1;
1438 unsigned int nelements, mis, same_align_drs_max = 0;
1440 if (vect_print_dump_info (REPORT_DETAILS))
1441 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1443 /* While cost model enhancements are expected in the future, the high level
1444 view of the code at this time is as follows:
1446 A) If there is a misaligned access then see if peeling to align
1447 this access can make all data references satisfy
1448 vect_supportable_dr_alignment. If so, update data structures
1449 as needed and return true.
1451 B) If peeling wasn't possible and there is a data reference with an
1452 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1453 then see if loop versioning checks can be used to make all data
1454 references satisfy vect_supportable_dr_alignment. If so, update
1455 data structures as needed and return true.
1457 C) If neither peeling nor versioning were successful then return false if
1458 any data reference does not satisfy vect_supportable_dr_alignment.
1460 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1462 Note, Possibility 3 above (which is peeling and versioning together) is not
1463 being done at this time. */
1465 /* (1) Peeling to force alignment. */
1467 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1469 + How many accesses will become aligned due to the peeling
1470 - How many accesses will become unaligned due to the peeling,
1471 and the cost of misaligned accesses.
1472 - The cost of peeling (the extra runtime checks, the increase
1475 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1477 stmt = DR_STMT (dr);
1478 stmt_info = vinfo_for_stmt (stmt);
1480 /* For interleaving, only the alignment of the first access
1482 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1483 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1486 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1487 do_peeling = vector_alignment_reachable_p (dr);
1490 if (known_alignment_for_access_p (dr))
1492 unsigned int npeel_tmp;
1494 /* Save info about DR in the hash table. */
1495 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1496 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1497 htab_create (1, vect_peeling_hash,
1498 vect_peeling_hash_eq, free);
1500 vectype = STMT_VINFO_VECTYPE (stmt_info);
1501 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1502 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1503 TREE_TYPE (DR_REF (dr))));
1504 npeel_tmp = (nelements - mis) % vf;
1506 /* For multiple types, it is possible that the bigger type access
1507 will have more than one peeling option. E.g., a loop with two
1508 types: one of size (vector size / 4), and the other one of
1509 size (vector size / 8). Vectorization factor will 8. If both
1510 access are misaligned by 3, the first one needs one scalar
1511 iteration to be aligned, and the second one needs 5. But the
1512 the first one will be aligned also by peeling 5 scalar
1513 iterations, and in that case both accesses will be aligned.
1514 Hence, except for the immediate peeling amount, we also want
1515 to try to add full vector size, while we don't exceed
1516 vectorization factor.
1517 We do this automtically for cost model, since we calculate cost
1518 for every peeling option. */
1519 if (!flag_vect_cost_model)
1520 possible_npeel_number = vf /nelements;
1522 /* Handle the aligned case. We may decide to align some other
1523 access, making DR unaligned. */
1524 if (DR_MISALIGNMENT (dr) == 0)
1527 if (!flag_vect_cost_model)
1528 possible_npeel_number++;
1531 for (j = 0; j < possible_npeel_number; j++)
1533 gcc_assert (npeel_tmp <= vf);
1534 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1535 npeel_tmp += nelements;
1538 all_misalignments_unknown = false;
1539 /* Data-ref that was chosen for the case that all the
1540 misalignments are unknown is not relevant anymore, since we
1541 have a data-ref with known alignment. */
1546 /* If we don't know all the misalignment values, we prefer
1547 peeling for data-ref that has maximum number of data-refs
1548 with the same alignment, unless the target prefers to align
1549 stores over load. */
1550 if (all_misalignments_unknown)
1552 if (same_align_drs_max < VEC_length (dr_p,
1553 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1556 same_align_drs_max = VEC_length (dr_p,
1557 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1561 if (!first_store && DR_IS_WRITE (dr))
1565 /* If there are both known and unknown misaligned accesses in the
1566 loop, we choose peeling amount according to the known
1570 if (!supportable_dr_alignment)
1573 if (!first_store && DR_IS_WRITE (dr))
1580 if (!aligned_access_p (dr))
1582 if (vect_print_dump_info (REPORT_DETAILS))
1583 fprintf (vect_dump, "vector alignment may not be reachable");
1590 vect_versioning_for_alias_required
1591 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1593 /* Temporarily, if versioning for alias is required, we disable peeling
1594 until we support peeling and versioning. Often peeling for alignment
1595 will require peeling for loop-bound, which in turn requires that we
1596 know how to adjust the loop ivs after the loop. */
1597 if (vect_versioning_for_alias_required
1598 || !vect_can_advance_ivs_p (loop_vinfo)
1599 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1602 if (do_peeling && all_misalignments_unknown
1603 && vect_supportable_dr_alignment (dr0, false))
1606 /* Check if the target requires to prefer stores over loads, i.e., if
1607 misaligned stores are more expensive than misaligned loads (taking
1608 drs with same alignment into account). */
1609 if (first_store && DR_IS_READ (dr0))
1611 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1612 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1613 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1614 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1616 vect_get_data_access_cost (dr0, &load_inside_cost,
1617 &load_outside_cost);
1618 vect_get_data_access_cost (first_store, &store_inside_cost,
1619 &store_outside_cost);
1621 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1622 aligning the load DR0). */
1623 load_inside_penalty = store_inside_cost;
1624 load_outside_penalty = store_outside_cost;
1625 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1626 (vinfo_for_stmt (DR_STMT (first_store))),
1629 if (DR_IS_READ (dr))
1631 load_inside_penalty += load_inside_cost;
1632 load_outside_penalty += load_outside_cost;
1636 load_inside_penalty += store_inside_cost;
1637 load_outside_penalty += store_outside_cost;
1640 /* Calculate the penalty for leaving DR0 unaligned (by
1641 aligning the FIRST_STORE). */
1642 store_inside_penalty = load_inside_cost;
1643 store_outside_penalty = load_outside_cost;
1644 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1645 (vinfo_for_stmt (DR_STMT (dr0))),
1648 if (DR_IS_READ (dr))
1650 store_inside_penalty += load_inside_cost;
1651 store_outside_penalty += load_outside_cost;
1655 store_inside_penalty += store_inside_cost;
1656 store_outside_penalty += store_outside_cost;
1659 if (load_inside_penalty > store_inside_penalty
1660 || (load_inside_penalty == store_inside_penalty
1661 && load_outside_penalty > store_outside_penalty))
1665 /* In case there are only loads with different unknown misalignments, use
1666 peeling only if it may help to align other accesses in the loop. */
1667 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1668 (vinfo_for_stmt (DR_STMT (dr0))))
1669 && vect_supportable_dr_alignment (dr0, false)
1670 != dr_unaligned_supported)
1674 if (do_peeling && !dr0)
1676 /* Peeling is possible, but there is no data access that is not supported
1677 unless aligned. So we try to choose the best possible peeling. */
1679 /* We should get here only if there are drs with known misalignment. */
1680 gcc_assert (!all_misalignments_unknown);
1682 /* Choose the best peeling from the hash table. */
1683 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel);
1690 stmt = DR_STMT (dr0);
1691 stmt_info = vinfo_for_stmt (stmt);
1692 vectype = STMT_VINFO_VECTYPE (stmt_info);
1693 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1695 if (known_alignment_for_access_p (dr0))
1699 /* Since it's known at compile time, compute the number of
1700 iterations in the peeled loop (the peeling factor) for use in
1701 updating DR_MISALIGNMENT values. The peeling factor is the
1702 vectorization factor minus the misalignment as an element
1704 mis = DR_MISALIGNMENT (dr0);
1705 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1706 npeel = nelements - mis;
1709 /* For interleaved data access every iteration accesses all the
1710 members of the group, therefore we divide the number of iterations
1711 by the group size. */
1712 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1713 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1714 npeel /= DR_GROUP_SIZE (stmt_info);
1716 if (vect_print_dump_info (REPORT_DETAILS))
1717 fprintf (vect_dump, "Try peeling by %d", npeel);
1720 /* Ensure that all data refs can be vectorized after the peel. */
1721 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1723 int save_misalignment;
1728 stmt = DR_STMT (dr);
1729 stmt_info = vinfo_for_stmt (stmt);
1730 /* For interleaving, only the alignment of the first access
1732 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1733 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1736 save_misalignment = DR_MISALIGNMENT (dr);
1737 vect_update_misalignment_for_peel (dr, dr0, npeel);
1738 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1739 SET_DR_MISALIGNMENT (dr, save_misalignment);
1741 if (!supportable_dr_alignment)
1748 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1750 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1759 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1760 If the misalignment of DR_i is identical to that of dr0 then set
1761 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1762 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1763 by the peeling factor times the element size of DR_i (MOD the
1764 vectorization factor times the size). Otherwise, the
1765 misalignment of DR_i must be set to unknown. */
1766 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1768 vect_update_misalignment_for_peel (dr, dr0, npeel);
1770 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1772 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1774 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1775 SET_DR_MISALIGNMENT (dr0, 0);
1776 if (vect_print_dump_info (REPORT_ALIGNMENT))
1777 fprintf (vect_dump, "Alignment of access forced using peeling.");
1779 if (vect_print_dump_info (REPORT_DETAILS))
1780 fprintf (vect_dump, "Peeling for alignment will be applied.");
1782 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1789 /* (2) Versioning to force alignment. */
1791 /* Try versioning if:
1792 1) flag_tree_vect_loop_version is TRUE
1793 2) optimize loop for speed
1794 3) there is at least one unsupported misaligned data ref with an unknown
1796 4) all misaligned data refs with a known misalignment are supported, and
1797 5) the number of runtime alignment checks is within reason. */
1800 flag_tree_vect_loop_version
1801 && optimize_loop_nest_for_speed_p (loop)
1802 && (!loop->inner); /* FORNOW */
1806 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
1808 stmt = DR_STMT (dr);
1809 stmt_info = vinfo_for_stmt (stmt);
1811 /* For interleaving, only the alignment of the first access
1813 if (aligned_access_p (dr)
1814 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1815 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1818 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
1820 if (!supportable_dr_alignment)
1826 if (known_alignment_for_access_p (dr)
1827 || VEC_length (gimple,
1828 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1829 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1831 do_versioning = false;
1835 stmt = DR_STMT (dr);
1836 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1837 gcc_assert (vectype);
1839 /* The rightmost bits of an aligned address must be zeros.
1840 Construct the mask needed for this test. For example,
1841 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1842 mask must be 15 = 0xf. */
1843 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1845 /* FORNOW: use the same mask to test all potentially unaligned
1846 references in the loop. The vectorizer currently supports
1847 a single vector size, see the reference to
1848 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1849 vectorization factor is computed. */
1850 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1851 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1852 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1853 VEC_safe_push (gimple, heap,
1854 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1859 /* Versioning requires at least one misaligned data reference. */
1860 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1861 do_versioning = false;
1862 else if (!do_versioning)
1863 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1868 VEC(gimple,heap) *may_misalign_stmts
1869 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1872 /* It can now be assumed that the data references in the statements
1873 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1874 of the loop being vectorized. */
1875 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
1877 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1878 dr = STMT_VINFO_DATA_REF (stmt_info);
1879 SET_DR_MISALIGNMENT (dr, 0);
1880 if (vect_print_dump_info (REPORT_ALIGNMENT))
1881 fprintf (vect_dump, "Alignment of access forced using versioning.");
1884 if (vect_print_dump_info (REPORT_DETAILS))
1885 fprintf (vect_dump, "Versioning for alignment will be applied.");
1887 /* Peeling and versioning can't be done together at this time. */
1888 gcc_assert (! (do_peeling && do_versioning));
1890 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1895 /* This point is reached if neither peeling nor versioning is being done. */
1896 gcc_assert (! (do_peeling || do_versioning));
1898 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1903 /* Function vect_find_same_alignment_drs.
1905 Update group and alignment relations according to the chosen
1906 vectorization factor. */
1909 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1910 loop_vec_info loop_vinfo)
1913 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1914 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1915 struct data_reference *dra = DDR_A (ddr);
1916 struct data_reference *drb = DDR_B (ddr);
1917 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1918 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1919 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1920 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1921 lambda_vector dist_v;
1922 unsigned int loop_depth;
1924 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1930 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1933 /* Loop-based vectorization and known data dependence. */
1934 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1937 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1938 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
1940 int dist = dist_v[loop_depth];
1942 if (vect_print_dump_info (REPORT_DR_DETAILS))
1943 fprintf (vect_dump, "dependence distance = %d.", dist);
1945 /* Same loop iteration. */
1947 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1949 /* Two references with distance zero have the same alignment. */
1950 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1951 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1952 if (vect_print_dump_info (REPORT_ALIGNMENT))
1953 fprintf (vect_dump, "accesses have the same alignment.");
1954 if (vect_print_dump_info (REPORT_DR_DETAILS))
1956 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1957 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1958 fprintf (vect_dump, " and ");
1959 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1966 /* Function vect_analyze_data_refs_alignment
1968 Analyze the alignment of the data-references in the loop.
1969 Return FALSE if a data reference is found that cannot be vectorized. */
1972 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1973 bb_vec_info bb_vinfo)
1975 if (vect_print_dump_info (REPORT_DETAILS))
1976 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1978 /* Mark groups of data references with same alignment using
1979 data dependence information. */
1982 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1983 struct data_dependence_relation *ddr;
1986 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
1987 vect_find_same_alignment_drs (ddr, loop_vinfo);
1990 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1992 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1994 "not vectorized: can't calculate alignment for data ref.");
2002 /* Analyze groups of strided accesses: check that DR belongs to a group of
2003 strided accesses of legal size, step, etc. Detect gaps, single element
2004 interleaving, and other special cases. Set strided access info.
2005 Collect groups of strided stores for further use in SLP analysis. */
2008 vect_analyze_group_access (struct data_reference *dr)
2010 tree step = DR_STEP (dr);
2011 tree scalar_type = TREE_TYPE (DR_REF (dr));
2012 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2013 gimple stmt = DR_STMT (dr);
2014 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2015 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2016 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2017 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2018 HOST_WIDE_INT stride;
2019 bool slp_impossible = false;
2021 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
2022 interleaving group (including gaps). */
2023 stride = dr_step / type_size;
2025 /* Not consecutive access is possible only if it is a part of interleaving. */
2026 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
2028 /* Check if it this DR is a part of interleaving, and is a single
2029 element of the group that is accessed in the loop. */
2031 /* Gaps are supported only for loads. STEP must be a multiple of the type
2032 size. The size of the group must be a power of 2. */
2034 && (dr_step % type_size) == 0
2036 && exact_log2 (stride) != -1)
2038 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
2039 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2040 if (vect_print_dump_info (REPORT_DR_DETAILS))
2042 fprintf (vect_dump, "Detected single element interleaving ");
2043 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2044 fprintf (vect_dump, " step ");
2045 print_generic_expr (vect_dump, step, TDF_SLIM);
2050 if (vect_print_dump_info (REPORT_DETAILS))
2052 fprintf (vect_dump, "not consecutive access ");
2053 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2058 /* Mark the statement as unvectorizable. */
2059 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2066 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
2068 /* First stmt in the interleaving chain. Check the chain. */
2069 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
2070 struct data_reference *data_ref = dr;
2071 unsigned int count = 1;
2073 tree prev_init = DR_INIT (data_ref);
2075 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
2079 /* Skip same data-refs. In case that two or more stmts share
2080 data-ref (supported only for loads), we vectorize only the first
2081 stmt, and the rest get their vectorized loads from the first
2083 if (!tree_int_cst_compare (DR_INIT (data_ref),
2084 DR_INIT (STMT_VINFO_DATA_REF (
2085 vinfo_for_stmt (next)))))
2087 if (DR_IS_WRITE (data_ref))
2089 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "Two store stmts share the same dr.");
2094 /* Check that there is no load-store dependencies for this loads
2095 to prevent a case of load-store-load to the same location. */
2096 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2097 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
2099 if (vect_print_dump_info (REPORT_DETAILS))
2101 "READ_WRITE dependence in interleaving.");
2105 /* For load use the same data-ref load. */
2106 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
2109 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2114 /* Check that all the accesses have the same STEP. */
2115 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2116 if (tree_int_cst_compare (step, next_step))
2118 if (vect_print_dump_info (REPORT_DETAILS))
2119 fprintf (vect_dump, "not consecutive access in interleaving");
2123 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2124 /* Check that the distance between two accesses is equal to the type
2125 size. Otherwise, we have gaps. */
2126 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2127 - TREE_INT_CST_LOW (prev_init)) / type_size;
2130 /* FORNOW: SLP of accesses with gaps is not supported. */
2131 slp_impossible = true;
2132 if (DR_IS_WRITE (data_ref))
2134 if (vect_print_dump_info (REPORT_DETAILS))
2135 fprintf (vect_dump, "interleaved store with gaps");
2142 /* Store the gap from the previous member of the group. If there is no
2143 gap in the access, DR_GROUP_GAP is always 1. */
2144 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
2146 prev_init = DR_INIT (data_ref);
2147 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
2148 /* Count the number of data-refs in the chain. */
2152 /* COUNT is the number of accesses found, we multiply it by the size of
2153 the type to get COUNT_IN_BYTES. */
2154 count_in_bytes = type_size * count;
2156 /* Check that the size of the interleaving (including gaps) is not
2157 greater than STEP. */
2158 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
2160 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "interleaving size is greater than step for ");
2163 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2168 /* Check that the size of the interleaving is equal to STEP for stores,
2169 i.e., that there are no gaps. */
2170 if (dr_step && dr_step != count_in_bytes)
2172 if (DR_IS_READ (dr))
2174 slp_impossible = true;
2175 /* There is a gap after the last load in the group. This gap is a
2176 difference between the stride and the number of elements. When
2177 there is no gap, this difference should be 0. */
2178 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
2182 if (vect_print_dump_info (REPORT_DETAILS))
2183 fprintf (vect_dump, "interleaved store with gaps");
2188 /* Check that STEP is a multiple of type size. */
2189 if (dr_step && (dr_step % type_size) != 0)
2191 if (vect_print_dump_info (REPORT_DETAILS))
2193 fprintf (vect_dump, "step is not a multiple of type size: step ");
2194 print_generic_expr (vect_dump, step, TDF_SLIM);
2195 fprintf (vect_dump, " size ");
2196 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2202 /* FORNOW: we handle only interleaving that is a power of 2.
2203 We don't fail here if it may be still possible to vectorize the
2204 group using SLP. If not, the size of the group will be checked in
2205 vect_analyze_operations, and the vectorization will fail. */
2206 if (exact_log2 (stride) == -1)
2208 if (vect_print_dump_info (REPORT_DETAILS))
2209 fprintf (vect_dump, "interleaving is not a power of 2");
2218 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
2219 if (vect_print_dump_info (REPORT_DETAILS))
2220 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
2222 /* SLP: create an SLP data structure for every interleaving group of
2223 stores for further analysis in vect_analyse_slp. */
2224 if (DR_IS_WRITE (dr) && !slp_impossible)
2227 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
2230 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
2239 /* Analyze the access pattern of the data-reference DR.
2240 In case of non-consecutive accesses call vect_analyze_group_access() to
2241 analyze groups of strided accesses. */
2244 vect_analyze_data_ref_access (struct data_reference *dr)
2246 tree step = DR_STEP (dr);
2247 tree scalar_type = TREE_TYPE (DR_REF (dr));
2248 gimple stmt = DR_STMT (dr);
2249 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2250 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2251 struct loop *loop = NULL;
2252 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2255 loop = LOOP_VINFO_LOOP (loop_vinfo);
2257 if (loop_vinfo && !step)
2259 if (vect_print_dump_info (REPORT_DETAILS))
2260 fprintf (vect_dump, "bad data-ref access in loop");
2264 /* Don't allow invariant accesses in loops. */
2265 if (loop_vinfo && dr_step == 0)
2268 if (loop && nested_in_vect_loop_p (loop, stmt))
2270 /* Interleaved accesses are not yet supported within outer-loop
2271 vectorization for references in the inner-loop. */
2272 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2274 /* For the rest of the analysis we use the outer-loop step. */
2275 step = STMT_VINFO_DR_STEP (stmt_info);
2276 dr_step = TREE_INT_CST_LOW (step);
2280 if (vect_print_dump_info (REPORT_ALIGNMENT))
2281 fprintf (vect_dump, "zero step in outer loop.");
2282 if (DR_IS_READ (dr))
2290 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2292 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2294 /* Mark that it is not interleaving. */
2295 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
2299 if (loop && nested_in_vect_loop_p (loop, stmt))
2301 if (vect_print_dump_info (REPORT_ALIGNMENT))
2302 fprintf (vect_dump, "strided access in outer loop.");
2306 /* Not consecutive access - check if it's a part of interleaving group. */
2307 return vect_analyze_group_access (dr);
2311 /* Function vect_analyze_data_ref_accesses.
2313 Analyze the access pattern of all the data references in the loop.
2315 FORNOW: the only access pattern that is considered vectorizable is a
2316 simple step 1 (consecutive) access.
2318 FORNOW: handle only arrays and pointer accesses. */
2321 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
2324 VEC (data_reference_p, heap) *datarefs;
2325 struct data_reference *dr;
2327 if (vect_print_dump_info (REPORT_DETAILS))
2328 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2331 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2333 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2335 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2336 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2337 && !vect_analyze_data_ref_access (dr))
2339 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2340 fprintf (vect_dump, "not vectorized: complicated access pattern.");
2344 /* Mark the statement as not vectorizable. */
2345 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2355 /* Function vect_prune_runtime_alias_test_list.
2357 Prune a list of ddrs to be tested at run-time by versioning for alias.
2358 Return FALSE if resulting list of ddrs is longer then allowed by
2359 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2362 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2364 VEC (ddr_p, heap) * ddrs =
2365 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2368 if (vect_print_dump_info (REPORT_DETAILS))
2369 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2371 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2376 ddr_i = VEC_index (ddr_p, ddrs, i);
2379 for (j = 0; j < i; j++)
2381 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2383 if (vect_vfa_range_equal (ddr_i, ddr_j))
2385 if (vect_print_dump_info (REPORT_DR_DETAILS))
2387 fprintf (vect_dump, "found equal ranges ");
2388 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2389 fprintf (vect_dump, ", ");
2390 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2391 fprintf (vect_dump, " and ");
2392 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2393 fprintf (vect_dump, ", ");
2394 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2403 VEC_ordered_remove (ddr_p, ddrs, i);
2409 if (VEC_length (ddr_p, ddrs) >
2410 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2412 if (vect_print_dump_info (REPORT_DR_DETAILS))
2415 "disable versioning for alias - max number of generated "
2416 "checks exceeded.");
2419 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2428 /* Function vect_analyze_data_refs.
2430 Find all the data references in the loop or basic block.
2432 The general structure of the analysis of data refs in the vectorizer is as
2434 1- vect_analyze_data_refs(loop/bb): call
2435 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2436 in the loop/bb and their dependences.
2437 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2438 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2439 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2444 vect_analyze_data_refs (loop_vec_info loop_vinfo,
2445 bb_vec_info bb_vinfo,
2448 struct loop *loop = NULL;
2449 basic_block bb = NULL;
2451 VEC (data_reference_p, heap) *datarefs;
2452 struct data_reference *dr;
2456 if (vect_print_dump_info (REPORT_DETAILS))
2457 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
2461 loop = LOOP_VINFO_LOOP (loop_vinfo);
2462 res = compute_data_dependences_for_loop
2463 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2464 &LOOP_VINFO_DDRS (loop_vinfo));
2468 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2469 fprintf (vect_dump, "not vectorized: loop contains function calls"
2470 " or data references that cannot be analyzed");
2474 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2478 bb = BB_VINFO_BB (bb_vinfo);
2479 res = compute_data_dependences_for_bb (bb, true,
2480 &BB_VINFO_DATAREFS (bb_vinfo),
2481 &BB_VINFO_DDRS (bb_vinfo));
2484 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2485 fprintf (vect_dump, "not vectorized: basic block contains function"
2486 " calls or data references that cannot be analyzed");
2490 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2493 /* Go through the data-refs, check that the analysis succeeded. Update
2494 pointer from stmt_vec_info struct to DR and vectype. */
2496 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
2499 stmt_vec_info stmt_info;
2500 tree base, offset, init;
2503 if (!dr || !DR_REF (dr))
2505 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2506 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2510 stmt = DR_STMT (dr);
2511 stmt_info = vinfo_for_stmt (stmt);
2513 /* Check that analysis of the data-ref succeeded. */
2514 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2517 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2519 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2520 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2525 /* Mark the statement as not vectorizable. */
2526 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2533 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2535 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2536 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2540 /* Mark the statement as not vectorizable. */
2541 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2548 base = unshare_expr (DR_BASE_ADDRESS (dr));
2549 offset = unshare_expr (DR_OFFSET (dr));
2550 init = unshare_expr (DR_INIT (dr));
2552 if (stmt_could_throw_p (stmt))
2554 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2556 fprintf (vect_dump, "not vectorized: statement can throw an "
2558 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2563 /* Update DR field in stmt_vec_info struct. */
2565 /* If the dataref is in an inner-loop of the loop that is considered for
2566 for vectorization, we also want to analyze the access relative to
2567 the outer-loop (DR contains information only relative to the
2568 inner-most enclosing loop). We do that by building a reference to the
2569 first location accessed by the inner-loop, and analyze it relative to
2571 if (loop && nested_in_vect_loop_p (loop, stmt))
2573 tree outer_step, outer_base, outer_init;
2574 HOST_WIDE_INT pbitsize, pbitpos;
2576 enum machine_mode pmode;
2577 int punsignedp, pvolatilep;
2578 affine_iv base_iv, offset_iv;
2581 /* Build a reference to the first location accessed by the
2582 inner-loop: *(BASE+INIT). (The first location is actually
2583 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2584 tree inner_base = build_fold_indirect_ref
2585 (fold_build2 (POINTER_PLUS_EXPR,
2586 TREE_TYPE (base), base,
2587 fold_convert (sizetype, init)));
2589 if (vect_print_dump_info (REPORT_DETAILS))
2591 fprintf (vect_dump, "analyze in outer-loop: ");
2592 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2595 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2596 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2597 gcc_assert (outer_base != NULL_TREE);
2599 if (pbitpos % BITS_PER_UNIT != 0)
2601 if (vect_print_dump_info (REPORT_DETAILS))
2602 fprintf (vect_dump, "failed: bit offset alignment.\n");
2606 outer_base = build_fold_addr_expr (outer_base);
2607 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2610 if (vect_print_dump_info (REPORT_DETAILS))
2611 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2618 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2626 offset_iv.base = ssize_int (0);
2627 offset_iv.step = ssize_int (0);
2629 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2632 if (vect_print_dump_info (REPORT_DETAILS))
2633 fprintf (vect_dump, "evolution of offset is not affine.\n");
2637 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2638 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2639 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2640 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2641 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2643 outer_step = size_binop (PLUS_EXPR,
2644 fold_convert (ssizetype, base_iv.step),
2645 fold_convert (ssizetype, offset_iv.step));
2647 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2648 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2649 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2650 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2651 STMT_VINFO_DR_OFFSET (stmt_info) =
2652 fold_convert (ssizetype, offset_iv.base);
2653 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2654 size_int (highest_pow2_factor (offset_iv.base));
2656 if (vect_print_dump_info (REPORT_DETAILS))
2658 fprintf (vect_dump, "\touter base_address: ");
2659 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2660 fprintf (vect_dump, "\n\touter offset from base address: ");
2661 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2662 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2663 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2664 fprintf (vect_dump, "\n\touter step: ");
2665 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2666 fprintf (vect_dump, "\n\touter aligned to: ");
2667 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2671 if (STMT_VINFO_DATA_REF (stmt_info))
2673 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2676 "not vectorized: more than one data ref in stmt: ");
2677 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2682 STMT_VINFO_DATA_REF (stmt_info) = dr;
2684 /* Set vectype for STMT. */
2685 scalar_type = TREE_TYPE (DR_REF (dr));
2686 STMT_VINFO_VECTYPE (stmt_info) =
2687 get_vectype_for_scalar_type (scalar_type);
2688 if (!STMT_VINFO_VECTYPE (stmt_info))
2690 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2693 "not vectorized: no vectype for stmt: ");
2694 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2695 fprintf (vect_dump, " scalar_type: ");
2696 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2701 /* Mark the statement as not vectorizable. */
2702 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2709 /* Adjust the minimal vectorization factor according to the
2711 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2720 /* Function vect_get_new_vect_var.
2722 Returns a name for a new variable. The current naming scheme appends the
2723 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2724 the name of vectorizer generated variables, and appends that to NAME if
2728 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2735 case vect_simple_var:
2738 case vect_scalar_var:
2741 case vect_pointer_var:
2750 char* tmp = concat (prefix, name, NULL);
2751 new_vect_var = create_tmp_var (type, tmp);
2755 new_vect_var = create_tmp_var (type, prefix);
2757 /* Mark vector typed variable as a gimple register variable. */
2758 if (TREE_CODE (type) == VECTOR_TYPE)
2759 DECL_GIMPLE_REG_P (new_vect_var) = true;
2761 return new_vect_var;
2765 /* Function vect_create_addr_base_for_vector_ref.
2767 Create an expression that computes the address of the first memory location
2768 that will be accessed for a data reference.
2771 STMT: The statement containing the data reference.
2772 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2773 OFFSET: Optional. If supplied, it is be added to the initial address.
2774 LOOP: Specify relative to which loop-nest should the address be computed.
2775 For example, when the dataref is in an inner-loop nested in an
2776 outer-loop that is now being vectorized, LOOP can be either the
2777 outer-loop, or the inner-loop. The first memory location accessed
2778 by the following dataref ('in' points to short):
2785 if LOOP=i_loop: &in (relative to i_loop)
2786 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2789 1. Return an SSA_NAME whose value is the address of the memory location of
2790 the first vector of the data reference.
2791 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2792 these statement(s) which define the returned SSA_NAME.
2794 FORNOW: We are only handling array accesses with step 1. */
2797 vect_create_addr_base_for_vector_ref (gimple stmt,
2798 gimple_seq *new_stmt_list,
2802 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2803 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2804 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2806 tree data_ref_base_var;
2808 tree addr_base, addr_expr;
2810 gimple_seq seq = NULL;
2811 tree base_offset = unshare_expr (DR_OFFSET (dr));
2812 tree init = unshare_expr (DR_INIT (dr));
2814 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2815 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2818 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2820 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2822 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2824 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2825 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2826 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2830 base_name = build_fold_indirect_ref (data_ref_base);
2833 base_offset = ssize_int (0);
2834 init = ssize_int (0);
2835 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2838 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2839 add_referenced_var (data_ref_base_var);
2840 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2842 gimple_seq_add_seq (new_stmt_list, seq);
2844 /* Create base_offset */
2845 base_offset = size_binop (PLUS_EXPR,
2846 fold_convert (sizetype, base_offset),
2847 fold_convert (sizetype, init));
2848 dest = create_tmp_var (sizetype, "base_off");
2849 add_referenced_var (dest);
2850 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2851 gimple_seq_add_seq (new_stmt_list, seq);
2855 tree tmp = create_tmp_var (sizetype, "offset");
2857 add_referenced_var (tmp);
2858 offset = fold_build2 (MULT_EXPR, sizetype,
2859 fold_convert (sizetype, offset), step);
2860 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2861 base_offset, offset);
2862 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2863 gimple_seq_add_seq (new_stmt_list, seq);
2866 /* base + base_offset */
2868 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2869 data_ref_base, base_offset);
2872 addr_base = build1 (ADDR_EXPR,
2873 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2874 unshare_expr (DR_REF (dr)));
2877 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2878 base = get_base_address (DR_REF (dr));
2880 && TREE_CODE (base) == MEM_REF)
2882 = build_qualified_type (vect_ptr_type,
2883 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2885 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2886 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2887 get_name (base_name));
2888 add_referenced_var (addr_expr);
2889 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2890 gimple_seq_add_seq (new_stmt_list, seq);
2892 if (DR_PTR_INFO (dr)
2893 && TREE_CODE (vec_stmt) == SSA_NAME)
2894 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
2896 if (vect_print_dump_info (REPORT_DETAILS))
2898 fprintf (vect_dump, "created ");
2899 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2906 /* Function vect_create_data_ref_ptr.
2908 Create a new pointer to vector type (vp), that points to the first location
2909 accessed in the loop by STMT, along with the def-use update chain to
2910 appropriately advance the pointer through the loop iterations. Also set
2911 aliasing information for the pointer. This vector pointer is used by the
2912 callers to this function to create a memory reference expression for vector
2916 1. STMT: a stmt that references memory. Expected to be of the form
2917 GIMPLE_ASSIGN <name, data-ref> or
2918 GIMPLE_ASSIGN <data-ref, name>.
2919 2. AT_LOOP: the loop where the vector memref is to be created.
2920 3. OFFSET (optional): an offset to be added to the initial address accessed
2921 by the data-ref in STMT.
2922 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2923 pointing to the initial address.
2924 5. TYPE: if not NULL indicates the required type of the data-ref.
2927 1. Declare a new ptr to vector_type, and have it point to the base of the
2928 data reference (initial addressed accessed by the data reference).
2929 For example, for vector of type V8HI, the following code is generated:
2932 vp = (v8hi *)initial_address;
2934 if OFFSET is not supplied:
2935 initial_address = &a[init];
2936 if OFFSET is supplied:
2937 initial_address = &a[init + OFFSET];
2939 Return the initial_address in INITIAL_ADDRESS.
2941 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2942 update the pointer in each iteration of the loop.
2944 Return the increment stmt that updates the pointer in PTR_INCR.
2946 3. Set INV_P to true if the access pattern of the data reference in the
2947 vectorized loop is invariant. Set it to false otherwise.
2949 4. Return the pointer. */
2952 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2953 tree offset, tree *initial_address, gimple *ptr_incr,
2954 bool only_init, bool *inv_p)
2957 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2958 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2959 struct loop *loop = NULL;
2960 bool nested_in_vect_loop = false;
2961 struct loop *containing_loop = NULL;
2962 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2967 gimple_seq new_stmt_list = NULL;
2971 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2973 gimple_stmt_iterator incr_gsi;
2976 tree indx_before_incr, indx_after_incr;
2979 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2980 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2985 loop = LOOP_VINFO_LOOP (loop_vinfo);
2986 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2987 containing_loop = (gimple_bb (stmt))->loop_father;
2988 pe = loop_preheader_edge (loop);
2992 gcc_assert (bb_vinfo);
2997 /* Check the step (evolution) of the load in LOOP, and record
2998 whether it's invariant. */
2999 if (nested_in_vect_loop)
3000 step = STMT_VINFO_DR_STEP (stmt_info);
3002 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
3004 if (tree_int_cst_compare (step, size_zero_node) == 0)
3008 negative = tree_int_cst_compare (step, size_zero_node) < 0;
3010 /* Create an expression for the first address accessed by this load
3012 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3014 if (vect_print_dump_info (REPORT_DETAILS))
3016 tree data_ref_base = base_name;
3017 fprintf (vect_dump, "create vector-pointer variable to type: ");
3018 print_generic_expr (vect_dump, vectype, TDF_SLIM);
3019 if (TREE_CODE (data_ref_base) == VAR_DECL
3020 || TREE_CODE (data_ref_base) == ARRAY_REF)
3021 fprintf (vect_dump, " vectorizing an array ref: ");
3022 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3023 fprintf (vect_dump, " vectorizing a record based array ref: ");
3024 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3025 fprintf (vect_dump, " vectorizing a pointer ref: ");
3026 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3029 /* (1) Create the new vector-pointer variable. */
3030 vect_ptr_type = build_pointer_type (vectype);
3031 base = get_base_address (DR_REF (dr));
3033 && TREE_CODE (base) == MEM_REF)
3035 = build_qualified_type (vect_ptr_type,
3036 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
3037 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3038 get_name (base_name));
3040 /* Vector types inherit the alias set of their component type by default so
3041 we need to use a ref-all pointer if the data reference does not conflict
3042 with the created vector data reference because it is not addressable. */
3043 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3044 get_alias_set (DR_REF (dr))))
3047 = build_pointer_type_for_mode (vectype,
3048 TYPE_MODE (vect_ptr_type), true);
3049 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3050 get_name (base_name));
3053 /* Likewise for any of the data references in the stmt group. */
3054 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
3056 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
3059 tree lhs = gimple_assign_lhs (orig_stmt);
3060 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
3061 get_alias_set (lhs)))
3064 = build_pointer_type_for_mode (vectype,
3065 TYPE_MODE (vect_ptr_type), true);
3067 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3068 get_name (base_name));
3072 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
3077 add_referenced_var (vect_ptr);
3079 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3080 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3081 def-use update cycles for the pointer: one relative to the outer-loop
3082 (LOOP), which is what steps (3) and (4) below do. The other is relative
3083 to the inner-loop (which is the inner-most loop containing the dataref),
3084 and this is done be step (5) below.
3086 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3087 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3088 redundant. Steps (3),(4) create the following:
3091 LOOP: vp1 = phi(vp0,vp2)
3097 If there is an inner-loop nested in loop, then step (5) will also be
3098 applied, and an additional update in the inner-loop will be created:
3101 LOOP: vp1 = phi(vp0,vp2)
3103 inner: vp3 = phi(vp1,vp4)
3104 vp4 = vp3 + inner_step
3110 /* (2) Calculate the initial address the vector-pointer, and set
3111 the vector-pointer to point to it before the loop. */
3113 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3115 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3121 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3122 gcc_assert (!new_bb);
3125 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
3128 *initial_address = new_temp;
3130 /* Create: p = (vectype *) initial_base */
3131 if (TREE_CODE (new_temp) != SSA_NAME
3132 || !useless_type_conversion_p (vect_ptr_type, TREE_TYPE (new_temp)))
3134 vec_stmt = gimple_build_assign (vect_ptr,
3135 fold_convert (vect_ptr_type, new_temp));
3136 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
3137 /* Copy the points-to information if it exists. */
3138 if (DR_PTR_INFO (dr))
3139 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
3140 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
3143 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3144 gcc_assert (!new_bb);
3147 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
3150 vect_ptr_init = new_temp;
3152 /* (3) Handle the updating of the vector-pointer inside the loop.
3153 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3154 inner-loop nested in LOOP (during outer-loop vectorization). */
3156 /* No update in loop is required. */
3157 if (only_init && (!loop_vinfo || at_loop == loop))
3158 vptr = vect_ptr_init;
3161 /* The step of the vector pointer is the Vector Size. */
3162 tree step = TYPE_SIZE_UNIT (vectype);
3163 /* One exception to the above is when the scalar step of the load in
3164 LOOP is zero. In this case the step here is also zero. */
3166 step = size_zero_node;
3168 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3170 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3172 create_iv (vect_ptr_init,
3173 fold_convert (vect_ptr_type, step),
3174 vect_ptr, loop, &incr_gsi, insert_after,
3175 &indx_before_incr, &indx_after_incr);
3176 incr = gsi_stmt (incr_gsi);
3177 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3179 /* Copy the points-to information if it exists. */
3180 if (DR_PTR_INFO (dr))
3182 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3183 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3188 vptr = indx_before_incr;
3191 if (!nested_in_vect_loop || only_init)
3195 /* (4) Handle the updating of the vector-pointer inside the inner-loop
3196 nested in LOOP, if exists. */
3198 gcc_assert (nested_in_vect_loop);
3201 standard_iv_increment_position (containing_loop, &incr_gsi,
3203 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
3204 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3206 incr = gsi_stmt (incr_gsi);
3207 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
3209 /* Copy the points-to information if it exists. */
3210 if (DR_PTR_INFO (dr))
3212 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3213 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3218 return indx_before_incr;
3225 /* Function bump_vector_ptr
3227 Increment a pointer (to a vector type) by vector-size. If requested,
3228 i.e. if PTR-INCR is given, then also connect the new increment stmt
3229 to the existing def-use update-chain of the pointer, by modifying
3230 the PTR_INCR as illustrated below:
3232 The pointer def-use update-chain before this function:
3233 DATAREF_PTR = phi (p_0, p_2)
3235 PTR_INCR: p_2 = DATAREF_PTR + step
3237 The pointer def-use update-chain after this function:
3238 DATAREF_PTR = phi (p_0, p_2)
3240 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3242 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3245 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
3247 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
3248 the loop. The increment amount across iterations is expected
3250 BSI - location where the new update stmt is to be placed.
3251 STMT - the original scalar memory-access stmt that is being vectorized.
3252 BUMP - optional. The offset by which to bump the pointer. If not given,
3253 the offset is assumed to be vector_size.
3255 Output: Return NEW_DATAREF_PTR as illustrated above.
3260 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3261 gimple stmt, tree bump)
3263 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3264 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3265 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3266 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3267 tree update = TYPE_SIZE_UNIT (vectype);
3270 use_operand_p use_p;
3271 tree new_dataref_ptr;
3276 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3277 dataref_ptr, update);
3278 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3279 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3280 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3282 /* Copy the points-to information if it exists. */
3283 if (DR_PTR_INFO (dr))
3284 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
3287 return new_dataref_ptr;
3289 /* Update the vector-pointer's cross-iteration increment. */
3290 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3292 tree use = USE_FROM_PTR (use_p);
3294 if (use == dataref_ptr)
3295 SET_USE (use_p, new_dataref_ptr);
3297 gcc_assert (tree_int_cst_compare (use, update) == 0);
3300 return new_dataref_ptr;
3304 /* Function vect_create_destination_var.
3306 Create a new temporary of type VECTYPE. */
3309 vect_create_destination_var (tree scalar_dest, tree vectype)
3312 const char *new_name;
3314 enum vect_var_kind kind;
3316 kind = vectype ? vect_simple_var : vect_scalar_var;
3317 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3319 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3321 new_name = get_name (scalar_dest);
3324 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3325 add_referenced_var (vec_dest);
3330 /* Function vect_strided_store_supported.
3332 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3333 and FALSE otherwise. */
3336 vect_strided_store_supported (tree vectype)
3338 optab interleave_high_optab, interleave_low_optab;
3339 enum machine_mode mode;
3341 mode = TYPE_MODE (vectype);
3343 /* Check that the operation is supported. */
3344 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3345 vectype, optab_default);
3346 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3347 vectype, optab_default);
3348 if (!interleave_high_optab || !interleave_low_optab)
3350 if (vect_print_dump_info (REPORT_DETAILS))
3351 fprintf (vect_dump, "no optab for interleave.");
3355 if (optab_handler (interleave_high_optab, mode) == CODE_FOR_nothing
3356 || optab_handler (interleave_low_optab, mode) == CODE_FOR_nothing)
3358 if (vect_print_dump_info (REPORT_DETAILS))
3359 fprintf (vect_dump, "interleave op not supported by target.");
3367 /* Function vect_permute_store_chain.
3369 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3370 a power of 2, generate interleave_high/low stmts to reorder the data
3371 correctly for the stores. Return the final references for stores in
3374 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3375 The input is 4 vectors each containing 8 elements. We assign a number to
3376 each element, the input sequence is:
3378 1st vec: 0 1 2 3 4 5 6 7
3379 2nd vec: 8 9 10 11 12 13 14 15
3380 3rd vec: 16 17 18 19 20 21 22 23
3381 4th vec: 24 25 26 27 28 29 30 31
3383 The output sequence should be:
3385 1st vec: 0 8 16 24 1 9 17 25
3386 2nd vec: 2 10 18 26 3 11 19 27
3387 3rd vec: 4 12 20 28 5 13 21 30
3388 4th vec: 6 14 22 30 7 15 23 31
3390 i.e., we interleave the contents of the four vectors in their order.
3392 We use interleave_high/low instructions to create such output. The input of
3393 each interleave_high/low operation is two vectors:
3396 the even elements of the result vector are obtained left-to-right from the
3397 high/low elements of the first vector. The odd elements of the result are
3398 obtained left-to-right from the high/low elements of the second vector.
3399 The output of interleave_high will be: 0 4 1 5
3400 and of interleave_low: 2 6 3 7
3403 The permutation is done in log LENGTH stages. In each stage interleave_high
3404 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3405 where the first argument is taken from the first half of DR_CHAIN and the
3406 second argument from it's second half.
3409 I1: interleave_high (1st vec, 3rd vec)
3410 I2: interleave_low (1st vec, 3rd vec)
3411 I3: interleave_high (2nd vec, 4th vec)
3412 I4: interleave_low (2nd vec, 4th vec)
3414 The output for the first stage is:
3416 I1: 0 16 1 17 2 18 3 19
3417 I2: 4 20 5 21 6 22 7 23
3418 I3: 8 24 9 25 10 26 11 27
3419 I4: 12 28 13 29 14 30 15 31
3421 The output of the second stage, i.e. the final result is:
3423 I1: 0 8 16 24 1 9 17 25
3424 I2: 2 10 18 26 3 11 19 27
3425 I3: 4 12 20 28 5 13 21 30
3426 I4: 6 14 22 30 7 15 23 31. */
3429 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3430 unsigned int length,
3432 gimple_stmt_iterator *gsi,
3433 VEC(tree,heap) **result_chain)
3435 tree perm_dest, vect1, vect2, high, low;
3437 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3440 enum tree_code high_code, low_code;
3442 /* Check that the operation is supported. */
3443 if (!vect_strided_store_supported (vectype))
3446 *result_chain = VEC_copy (tree, heap, dr_chain);
3448 for (i = 0; i < exact_log2 (length); i++)
3450 for (j = 0; j < length/2; j++)
3452 vect1 = VEC_index (tree, dr_chain, j);
3453 vect2 = VEC_index (tree, dr_chain, j+length/2);
3455 /* Create interleaving stmt:
3456 in the case of big endian:
3457 high = interleave_high (vect1, vect2)
3458 and in the case of little endian:
3459 high = interleave_low (vect1, vect2). */
3460 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3461 DECL_GIMPLE_REG_P (perm_dest) = 1;
3462 add_referenced_var (perm_dest);
3463 if (BYTES_BIG_ENDIAN)
3465 high_code = VEC_INTERLEAVE_HIGH_EXPR;
3466 low_code = VEC_INTERLEAVE_LOW_EXPR;
3470 low_code = VEC_INTERLEAVE_HIGH_EXPR;
3471 high_code = VEC_INTERLEAVE_LOW_EXPR;
3473 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
3475 high = make_ssa_name (perm_dest, perm_stmt);
3476 gimple_assign_set_lhs (perm_stmt, high);
3477 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3478 VEC_replace (tree, *result_chain, 2*j, high);
3480 /* Create interleaving stmt:
3481 in the case of big endian:
3482 low = interleave_low (vect1, vect2)
3483 and in the case of little endian:
3484 low = interleave_high (vect1, vect2). */
3485 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3486 DECL_GIMPLE_REG_P (perm_dest) = 1;
3487 add_referenced_var (perm_dest);
3488 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3490 low = make_ssa_name (perm_dest, perm_stmt);
3491 gimple_assign_set_lhs (perm_stmt, low);
3492 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3493 VEC_replace (tree, *result_chain, 2*j+1, low);
3495 dr_chain = VEC_copy (tree, heap, *result_chain);
3500 /* Function vect_setup_realignment
3502 This function is called when vectorizing an unaligned load using
3503 the dr_explicit_realign[_optimized] scheme.
3504 This function generates the following code at the loop prolog:
3507 x msq_init = *(floor(p)); # prolog load
3508 realignment_token = call target_builtin;
3510 x msq = phi (msq_init, ---)
3512 The stmts marked with x are generated only for the case of
3513 dr_explicit_realign_optimized.
3515 The code above sets up a new (vector) pointer, pointing to the first
3516 location accessed by STMT, and a "floor-aligned" load using that pointer.
3517 It also generates code to compute the "realignment-token" (if the relevant
3518 target hook was defined), and creates a phi-node at the loop-header bb
3519 whose arguments are the result of the prolog-load (created by this
3520 function) and the result of a load that takes place in the loop (to be
3521 created by the caller to this function).
3523 For the case of dr_explicit_realign_optimized:
3524 The caller to this function uses the phi-result (msq) to create the
3525 realignment code inside the loop, and sets up the missing phi argument,
3528 msq = phi (msq_init, lsq)
3529 lsq = *(floor(p')); # load in loop
3530 result = realign_load (msq, lsq, realignment_token);
3532 For the case of dr_explicit_realign:
3534 msq = *(floor(p)); # load in loop
3536 lsq = *(floor(p')); # load in loop
3537 result = realign_load (msq, lsq, realignment_token);
3540 STMT - (scalar) load stmt to be vectorized. This load accesses
3541 a memory location that may be unaligned.
3542 BSI - place where new code is to be inserted.
3543 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3547 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3548 target hook, if defined.
3549 Return value - the result of the loop-header phi node. */
3552 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3553 tree *realignment_token,
3554 enum dr_alignment_support alignment_support_scheme,
3556 struct loop **at_loop)
3558 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3559 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3560 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3561 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3562 struct loop *loop = NULL;
3564 tree scalar_dest = gimple_assign_lhs (stmt);
3571 tree msq_init = NULL_TREE;
3574 tree msq = NULL_TREE;
3575 gimple_seq stmts = NULL;
3577 bool compute_in_loop = false;
3578 bool nested_in_vect_loop = false;
3579 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3580 struct loop *loop_for_initial_load = NULL;
3584 loop = LOOP_VINFO_LOOP (loop_vinfo);
3585 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3588 gcc_assert (alignment_support_scheme == dr_explicit_realign
3589 || alignment_support_scheme == dr_explicit_realign_optimized);
3591 /* We need to generate three things:
3592 1. the misalignment computation
3593 2. the extra vector load (for the optimized realignment scheme).
3594 3. the phi node for the two vectors from which the realignment is
3595 done (for the optimized realignment scheme). */
3597 /* 1. Determine where to generate the misalignment computation.
3599 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3600 calculation will be generated by this function, outside the loop (in the
3601 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3602 caller, inside the loop.
3604 Background: If the misalignment remains fixed throughout the iterations of
3605 the loop, then both realignment schemes are applicable, and also the
3606 misalignment computation can be done outside LOOP. This is because we are
3607 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3608 are a multiple of VS (the Vector Size), and therefore the misalignment in
3609 different vectorized LOOP iterations is always the same.
3610 The problem arises only if the memory access is in an inner-loop nested
3611 inside LOOP, which is now being vectorized using outer-loop vectorization.
3612 This is the only case when the misalignment of the memory access may not
3613 remain fixed throughout the iterations of the inner-loop (as explained in
3614 detail in vect_supportable_dr_alignment). In this case, not only is the
3615 optimized realignment scheme not applicable, but also the misalignment
3616 computation (and generation of the realignment token that is passed to
3617 REALIGN_LOAD) have to be done inside the loop.
3619 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3620 or not, which in turn determines if the misalignment is computed inside
3621 the inner-loop, or outside LOOP. */
3623 if (init_addr != NULL_TREE || !loop_vinfo)
3625 compute_in_loop = true;
3626 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3630 /* 2. Determine where to generate the extra vector load.
3632 For the optimized realignment scheme, instead of generating two vector
3633 loads in each iteration, we generate a single extra vector load in the
3634 preheader of the loop, and in each iteration reuse the result of the
3635 vector load from the previous iteration. In case the memory access is in
3636 an inner-loop nested inside LOOP, which is now being vectorized using
3637 outer-loop vectorization, we need to determine whether this initial vector
3638 load should be generated at the preheader of the inner-loop, or can be
3639 generated at the preheader of LOOP. If the memory access has no evolution
3640 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3641 to be generated inside LOOP (in the preheader of the inner-loop). */
3643 if (nested_in_vect_loop)
3645 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3646 bool invariant_in_outerloop =
3647 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3648 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3651 loop_for_initial_load = loop;
3653 *at_loop = loop_for_initial_load;
3655 if (loop_for_initial_load)
3656 pe = loop_preheader_edge (loop_for_initial_load);
3658 /* 3. For the case of the optimized realignment, create the first vector
3659 load at the loop preheader. */
3661 if (alignment_support_scheme == dr_explicit_realign_optimized)
3663 /* Create msq_init = *(floor(p1)) in the loop preheader */
3665 gcc_assert (!compute_in_loop);
3666 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3667 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3668 &init_addr, &inc, true, &inv_p);
3669 new_stmt = gimple_build_assign_with_ops
3670 (BIT_AND_EXPR, NULL_TREE, ptr,
3671 build_int_cst (TREE_TYPE (ptr),
3672 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
3673 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
3674 gimple_assign_set_lhs (new_stmt, new_temp);
3675 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3676 gcc_assert (!new_bb);
3678 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
3679 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
3680 new_stmt = gimple_build_assign (vec_dest, data_ref);
3681 new_temp = make_ssa_name (vec_dest, new_stmt);
3682 gimple_assign_set_lhs (new_stmt, new_temp);
3683 mark_symbols_for_renaming (new_stmt);
3686 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3687 gcc_assert (!new_bb);
3690 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3692 msq_init = gimple_assign_lhs (new_stmt);
3695 /* 4. Create realignment token using a target builtin, if available.
3696 It is done either inside the containing loop, or before LOOP (as
3697 determined above). */
3699 if (targetm.vectorize.builtin_mask_for_load)
3703 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3706 /* Generate the INIT_ADDR computation outside LOOP. */
3707 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3711 pe = loop_preheader_edge (loop);
3712 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3713 gcc_assert (!new_bb);
3716 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
3719 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3720 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3722 vect_create_destination_var (scalar_dest,
3723 gimple_call_return_type (new_stmt));
3724 new_temp = make_ssa_name (vec_dest, new_stmt);
3725 gimple_call_set_lhs (new_stmt, new_temp);
3727 if (compute_in_loop)
3728 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3731 /* Generate the misalignment computation outside LOOP. */
3732 pe = loop_preheader_edge (loop);
3733 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3734 gcc_assert (!new_bb);
3737 *realignment_token = gimple_call_lhs (new_stmt);
3739 /* The result of the CALL_EXPR to this builtin is determined from
3740 the value of the parameter and no global variables are touched
3741 which makes the builtin a "const" function. Requiring the
3742 builtin to have the "const" attribute makes it unnecessary
3743 to call mark_call_clobbered. */
3744 gcc_assert (TREE_READONLY (builtin_decl));
3747 if (alignment_support_scheme == dr_explicit_realign)
3750 gcc_assert (!compute_in_loop);
3751 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3754 /* 5. Create msq = phi <msq_init, lsq> in loop */
3756 pe = loop_preheader_edge (containing_loop);
3757 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3758 msq = make_ssa_name (vec_dest, NULL);
3759 phi_stmt = create_phi_node (msq, containing_loop->header);
3760 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3761 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3767 /* Function vect_strided_load_supported.
3769 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3770 and FALSE otherwise. */
3773 vect_strided_load_supported (tree vectype)
3775 optab perm_even_optab, perm_odd_optab;
3776 enum machine_mode mode;
3778 mode = TYPE_MODE (vectype);
3780 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3782 if (!perm_even_optab)
3784 if (vect_print_dump_info (REPORT_DETAILS))
3785 fprintf (vect_dump, "no optab for perm_even.");
3789 if (optab_handler (perm_even_optab, mode) == CODE_FOR_nothing)
3791 if (vect_print_dump_info (REPORT_DETAILS))
3792 fprintf (vect_dump, "perm_even op not supported by target.");
3796 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3798 if (!perm_odd_optab)
3800 if (vect_print_dump_info (REPORT_DETAILS))
3801 fprintf (vect_dump, "no optab for perm_odd.");
3805 if (optab_handler (perm_odd_optab, mode) == CODE_FOR_nothing)
3807 if (vect_print_dump_info (REPORT_DETAILS))
3808 fprintf (vect_dump, "perm_odd op not supported by target.");
3815 /* Function vect_permute_load_chain.
3817 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3818 a power of 2, generate extract_even/odd stmts to reorder the input data
3819 correctly. Return the final references for loads in RESULT_CHAIN.
3821 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3822 The input is 4 vectors each containing 8 elements. We assign a number to each
3823 element, the input sequence is:
3825 1st vec: 0 1 2 3 4 5 6 7
3826 2nd vec: 8 9 10 11 12 13 14 15
3827 3rd vec: 16 17 18 19 20 21 22 23
3828 4th vec: 24 25 26 27 28 29 30 31
3830 The output sequence should be:
3832 1st vec: 0 4 8 12 16 20 24 28
3833 2nd vec: 1 5 9 13 17 21 25 29
3834 3rd vec: 2 6 10 14 18 22 26 30
3835 4th vec: 3 7 11 15 19 23 27 31
3837 i.e., the first output vector should contain the first elements of each
3838 interleaving group, etc.
3840 We use extract_even/odd instructions to create such output. The input of
3841 each extract_even/odd operation is two vectors
3845 and the output is the vector of extracted even/odd elements. The output of
3846 extract_even will be: 0 2 4 6
3847 and of extract_odd: 1 3 5 7
3850 The permutation is done in log LENGTH stages. In each stage extract_even
3851 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
3852 their order. In our example,
3854 E1: extract_even (1st vec, 2nd vec)
3855 E2: extract_odd (1st vec, 2nd vec)
3856 E3: extract_even (3rd vec, 4th vec)
3857 E4: extract_odd (3rd vec, 4th vec)
3859 The output for the first stage will be:
3861 E1: 0 2 4 6 8 10 12 14
3862 E2: 1 3 5 7 9 11 13 15
3863 E3: 16 18 20 22 24 26 28 30
3864 E4: 17 19 21 23 25 27 29 31
3866 In order to proceed and create the correct sequence for the next stage (or
3867 for the correct output, if the second stage is the last one, as in our
3868 example), we first put the output of extract_even operation and then the
3869 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3870 The input for the second stage is:
3872 1st vec (E1): 0 2 4 6 8 10 12 14
3873 2nd vec (E3): 16 18 20 22 24 26 28 30
3874 3rd vec (E2): 1 3 5 7 9 11 13 15
3875 4th vec (E4): 17 19 21 23 25 27 29 31
3877 The output of the second stage:
3879 E1: 0 4 8 12 16 20 24 28
3880 E2: 2 6 10 14 18 22 26 30
3881 E3: 1 5 9 13 17 21 25 29
3882 E4: 3 7 11 15 19 23 27 31
3884 And RESULT_CHAIN after reordering:
3886 1st vec (E1): 0 4 8 12 16 20 24 28
3887 2nd vec (E3): 1 5 9 13 17 21 25 29
3888 3rd vec (E2): 2 6 10 14 18 22 26 30
3889 4th vec (E4): 3 7 11 15 19 23 27 31. */
3892 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3893 unsigned int length,
3895 gimple_stmt_iterator *gsi,
3896 VEC(tree,heap) **result_chain)
3898 tree perm_dest, data_ref, first_vect, second_vect;
3900 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3904 /* Check that the operation is supported. */
3905 if (!vect_strided_load_supported (vectype))
3908 *result_chain = VEC_copy (tree, heap, dr_chain);
3909 for (i = 0; i < exact_log2 (length); i++)
3911 for (j = 0; j < length; j +=2)
3913 first_vect = VEC_index (tree, dr_chain, j);
3914 second_vect = VEC_index (tree, dr_chain, j+1);
3916 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3917 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3918 DECL_GIMPLE_REG_P (perm_dest) = 1;
3919 add_referenced_var (perm_dest);
3921 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3922 perm_dest, first_vect,
3925 data_ref = make_ssa_name (perm_dest, perm_stmt);
3926 gimple_assign_set_lhs (perm_stmt, data_ref);
3927 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3928 mark_symbols_for_renaming (perm_stmt);
3930 VEC_replace (tree, *result_chain, j/2, data_ref);
3932 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3933 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3934 DECL_GIMPLE_REG_P (perm_dest) = 1;
3935 add_referenced_var (perm_dest);
3937 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3938 perm_dest, first_vect,
3940 data_ref = make_ssa_name (perm_dest, perm_stmt);
3941 gimple_assign_set_lhs (perm_stmt, data_ref);
3942 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3943 mark_symbols_for_renaming (perm_stmt);
3945 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3947 dr_chain = VEC_copy (tree, heap, *result_chain);
3953 /* Function vect_transform_strided_load.
3955 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3956 to perform their permutation and ascribe the result vectorized statements to
3957 the scalar statements.
3961 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3962 gimple_stmt_iterator *gsi)
3964 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3965 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3966 gimple next_stmt, new_stmt;
3967 VEC(tree,heap) *result_chain = NULL;
3968 unsigned int i, gap_count;
3971 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3972 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3973 vectors, that are ready for vector computation. */
3974 result_chain = VEC_alloc (tree, heap, size);
3976 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3979 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3980 Since we scan the chain starting from it's first node, their order
3981 corresponds the order of data-refs in RESULT_CHAIN. */
3982 next_stmt = first_stmt;
3984 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
3989 /* Skip the gaps. Loads created for the gaps will be removed by dead
3990 code elimination pass later. No need to check for the first stmt in
3991 the group, since it always exists.
3992 DR_GROUP_GAP is the number of steps in elements from the previous
3993 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3994 correspond to the gaps. */
3995 if (next_stmt != first_stmt
3996 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4004 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4005 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4006 copies, and we put the new vector statement in the first available
4008 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4009 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4012 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4015 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4017 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4020 prev_stmt = rel_stmt;
4022 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4025 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
4030 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4032 /* If NEXT_STMT accesses the same DR as the previous statement,
4033 put the same TMP_DATA_REF as its vectorized statement; otherwise
4034 get the next data-ref from RESULT_CHAIN. */
4035 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4040 VEC_free (tree, heap, result_chain);
4044 /* Function vect_force_dr_alignment_p.
4046 Returns whether the alignment of a DECL can be forced to be aligned
4047 on ALIGNMENT bit boundary. */
4050 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4052 if (TREE_CODE (decl) != VAR_DECL)
4055 if (DECL_EXTERNAL (decl))
4058 if (TREE_ASM_WRITTEN (decl))
4061 if (TREE_STATIC (decl))
4062 return (alignment <= MAX_OFILE_ALIGNMENT);
4064 return (alignment <= MAX_STACK_ALIGNMENT);
4068 /* Return whether the data reference DR is supported with respect to its
4070 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4071 it is aligned, i.e., check if it is possible to vectorize it with different
4074 enum dr_alignment_support
4075 vect_supportable_dr_alignment (struct data_reference *dr,
4076 bool check_aligned_accesses)
4078 gimple stmt = DR_STMT (dr);
4079 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4080 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4081 enum machine_mode mode = TYPE_MODE (vectype);
4082 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4083 struct loop *vect_loop = NULL;
4084 bool nested_in_vect_loop = false;
4086 if (aligned_access_p (dr) && !check_aligned_accesses)
4091 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4092 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4095 /* Possibly unaligned access. */
4097 /* We can choose between using the implicit realignment scheme (generating
4098 a misaligned_move stmt) and the explicit realignment scheme (generating
4099 aligned loads with a REALIGN_LOAD). There are two variants to the
4100 explicit realignment scheme: optimized, and unoptimized.
4101 We can optimize the realignment only if the step between consecutive
4102 vector loads is equal to the vector size. Since the vector memory
4103 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4104 is guaranteed that the misalignment amount remains the same throughout the
4105 execution of the vectorized loop. Therefore, we can create the
4106 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4107 at the loop preheader.
4109 However, in the case of outer-loop vectorization, when vectorizing a
4110 memory access in the inner-loop nested within the LOOP that is now being
4111 vectorized, while it is guaranteed that the misalignment of the
4112 vectorized memory access will remain the same in different outer-loop
4113 iterations, it is *not* guaranteed that is will remain the same throughout
4114 the execution of the inner-loop. This is because the inner-loop advances
4115 with the original scalar step (and not in steps of VS). If the inner-loop
4116 step happens to be a multiple of VS, then the misalignment remains fixed
4117 and we can use the optimized realignment scheme. For example:
4123 When vectorizing the i-loop in the above example, the step between
4124 consecutive vector loads is 1, and so the misalignment does not remain
4125 fixed across the execution of the inner-loop, and the realignment cannot
4126 be optimized (as illustrated in the following pseudo vectorized loop):
4128 for (i=0; i<N; i+=4)
4129 for (j=0; j<M; j++){
4130 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4131 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4132 // (assuming that we start from an aligned address).
4135 We therefore have to use the unoptimized realignment scheme:
4137 for (i=0; i<N; i+=4)
4138 for (j=k; j<M; j+=4)
4139 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4140 // that the misalignment of the initial address is
4143 The loop can then be vectorized as follows:
4145 for (k=0; k<4; k++){
4146 rt = get_realignment_token (&vp[k]);
4147 for (i=0; i<N; i+=4){
4149 for (j=k; j<M; j+=4){
4151 va = REALIGN_LOAD <v1,v2,rt>;
4158 if (DR_IS_READ (dr))
4160 bool is_packed = false;
4161 tree type = (TREE_TYPE (DR_REF (dr)));
4163 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
4164 && (!targetm.vectorize.builtin_mask_for_load
4165 || targetm.vectorize.builtin_mask_for_load ()))
4167 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4168 if ((nested_in_vect_loop
4169 && (TREE_INT_CST_LOW (DR_STEP (dr))
4170 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4172 return dr_explicit_realign;
4174 return dr_explicit_realign_optimized;
4176 if (!known_alignment_for_access_p (dr))
4178 tree ba = DR_BASE_OBJECT (dr);
4181 is_packed = contains_packed_reference (ba);
4184 if (targetm.vectorize.
4185 support_vector_misalignment (mode, type,
4186 DR_MISALIGNMENT (dr), is_packed))
4187 /* Can't software pipeline the loads, but can at least do them. */
4188 return dr_unaligned_supported;
4192 bool is_packed = false;
4193 tree type = (TREE_TYPE (DR_REF (dr)));
4195 if (!known_alignment_for_access_p (dr))
4197 tree ba = DR_BASE_OBJECT (dr);
4200 is_packed = contains_packed_reference (ba);
4203 if (targetm.vectorize.
4204 support_vector_misalignment (mode, type,
4205 DR_MISALIGNMENT (dr), is_packed))
4206 return dr_unaligned_supported;
4210 return dr_unaligned_unsupported;