1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
78 scalar_type = rhs_type;
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
93 gimple next_stmt = first_stmt;
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
99 while (next_stmt && next_stmt != stmt)
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
112 /* Function vect_insert_into_interleaving_chain.
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
147 /* Function vect_update_interleaving_chain.
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
163 NEXT(FIRST_DR) = previous FIRST_DR
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
179 gimple node, prev, next, first_stmt;
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
290 /* Function vect_equal_offsets.
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
295 vect_equal_offsets (tree offset1, tree offset2)
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
302 if (offset1 == offset2)
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
307 || !BINARY_CLASS_P (offset2))
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
311 TREE_OPERAND (offset2, 0));
312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
313 TREE_OPERAND (offset2, 1));
315 return (res0 && res1);
319 /* Function vect_check_interleaving.
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
325 vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
339 || DR_IS_READ (dra) != DR_IS_READ (drb))
343 1. data-refs are of the same type
344 2. their steps are equal
345 3. the step (if greater than zero) is greater than the difference between
347 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
348 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
350 if (type_size_a != type_size_b
351 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
352 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
353 TREE_TYPE (DR_REF (drb))))
356 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
357 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
358 step = TREE_INT_CST_LOW (DR_STEP (dra));
362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
363 and DRB is accessed before DRA. */
364 diff_mod_size = (init_a - init_b) % type_size_a;
366 if (step && (init_a - init_b) > step)
369 if (diff_mod_size == 0)
371 vect_update_interleaving_chain (drb, dra);
372 if (vect_print_dump_info (REPORT_DR_DETAILS))
374 fprintf (vect_dump, "Detected interleaving ");
375 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
376 fprintf (vect_dump, " and ");
377 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
384 /* If init_b == init_a + the size of the type * k, we have an
385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size = (init_b - init_a) % type_size_a;
388 if (step && (init_b - init_a) > step)
391 if (diff_mod_size == 0)
393 vect_update_interleaving_chain (dra, drb);
394 if (vect_print_dump_info (REPORT_DR_DETAILS))
396 fprintf (vect_dump, "Detected interleaving ");
397 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
398 fprintf (vect_dump, " and ");
399 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
408 /* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
413 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
415 gimple stmt_i = DR_STMT (dr_i);
416 gimple stmt_j = DR_STMT (dr_j);
418 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
428 /* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
432 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
434 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
435 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
436 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
437 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
443 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
448 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
450 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
455 if (vect_print_dump_info (REPORT_DR_DETAILS))
457 fprintf (vect_dump, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
459 fprintf (vect_dump, " and ");
460 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
463 if (optimize_loop_nest_for_size_p (loop))
465 if (vect_print_dump_info (REPORT_DR_DETAILS))
466 fprintf (vect_dump, "versioning not supported when optimizing for size.");
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
473 if (vect_print_dump_info (REPORT_DR_DETAILS))
474 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
483 /* Function vect_analyze_data_ref_dependence.
485 Return TRUE if there (might) exist a dependence between a memory-reference
486 DRA and a memory-reference DRB. When versioning for alias may check a
487 dependence at run-time, return FALSE. */
490 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
491 loop_vec_info loop_vinfo)
494 struct loop *loop = NULL;
495 int vectorization_factor = 0;
496 struct data_reference *dra = DDR_A (ddr);
497 struct data_reference *drb = DDR_B (ddr);
498 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
499 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
500 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
501 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
502 lambda_vector dist_v;
503 unsigned int loop_depth;
505 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
507 /* Independent data accesses. */
508 vect_check_interleaving (dra, drb);
514 loop = LOOP_VINFO_LOOP (loop_vinfo);
515 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
518 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
521 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
525 if (vect_print_dump_info (REPORT_DR_DETAILS))
527 fprintf (vect_dump, "versioning for alias required: "
528 "can't determine dependence between ");
529 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
530 fprintf (vect_dump, " and ");
531 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
534 /* Add to list of ddrs that need to be tested at run-time. */
535 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
538 /* When vectorizing a basic block unknown depnedence can still mean
540 if (vect_check_interleaving (dra, drb))
543 if (vect_print_dump_info (REPORT_DR_DETAILS))
545 fprintf (vect_dump, "can't determine dependence between ");
546 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
547 fprintf (vect_dump, " and ");
548 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
554 /* Versioning for alias is not yet supported for basic block SLP, and
555 dependence distance is unapplicable, hence, in case of known data
556 dependence, basic block vectorization is impossible for now. */
559 if (dra != drb && vect_check_interleaving (dra, drb))
562 if (vect_print_dump_info (REPORT_DR_DETAILS))
564 fprintf (vect_dump, "determined dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
573 /* Loop-based vectorization and known data dependence. */
574 if (DDR_NUM_DIST_VECTS (ddr) == 0)
576 if (vect_print_dump_info (REPORT_DR_DETAILS))
578 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
579 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
580 fprintf (vect_dump, " and ");
581 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
583 /* Add to list of ddrs that need to be tested at run-time. */
584 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
587 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
588 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
590 int dist = dist_v[loop_depth];
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
593 fprintf (vect_dump, "dependence distance = %d.", dist);
595 /* Same loop iteration. */
596 if (dist % vectorization_factor == 0 && dra_size == drb_size)
598 /* Two references with distance zero have the same alignment. */
599 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
600 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
601 if (vect_print_dump_info (REPORT_ALIGNMENT))
602 fprintf (vect_dump, "accesses have the same alignment.");
603 if (vect_print_dump_info (REPORT_DR_DETAILS))
605 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
606 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
607 fprintf (vect_dump, " and ");
608 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
611 /* For interleaving, mark that there is a read-write dependency if
612 necessary. We check before that one of the data-refs is store. */
613 if (DR_IS_READ (dra))
614 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
617 if (DR_IS_READ (drb))
618 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
624 if (abs (dist) >= vectorization_factor
625 || (dist > 0 && DDR_REVERSED_P (ddr)))
627 /* Dependence distance does not create dependence, as far as
628 vectorization is concerned, in this case. If DDR_REVERSED_P the
629 order of the data-refs in DDR was reversed (to make distance
630 vector positive), and the actual distance is negative. */
631 if (vect_print_dump_info (REPORT_DR_DETAILS))
632 fprintf (vect_dump, "dependence distance >= VF or negative.");
636 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
638 fprintf (vect_dump, "not vectorized, possible dependence "
639 "between data-refs ");
640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
651 /* Function vect_analyze_data_ref_dependences.
653 Examine all the data references in the loop, and make sure there do not
654 exist any data dependences between them. */
657 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
658 bb_vec_info bb_vinfo)
661 VEC (ddr_p, heap) *ddrs = NULL;
662 struct data_dependence_relation *ddr;
664 if (vect_print_dump_info (REPORT_DETAILS))
665 fprintf (vect_dump, "=== vect_analyze_dependences ===");
668 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
670 ddrs = BB_VINFO_DDRS (bb_vinfo);
672 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
673 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
680 /* Function vect_compute_data_ref_alignment
682 Compute the misalignment of the data reference DR.
685 1. If during the misalignment computation it is found that the data reference
686 cannot be vectorized then false is returned.
687 2. DR_MISALIGNMENT (DR) is defined.
689 FOR NOW: No analysis is actually performed. Misalignment is calculated
690 only for trivial cases. TODO. */
693 vect_compute_data_ref_alignment (struct data_reference *dr)
695 gimple stmt = DR_STMT (dr);
696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
698 struct loop *loop = NULL;
699 tree ref = DR_REF (dr);
701 tree base, base_addr;
704 tree aligned_to, alignment;
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
710 loop = LOOP_VINFO_LOOP (loop_vinfo);
712 /* Initialize misalignment to unknown. */
713 SET_DR_MISALIGNMENT (dr, -1);
715 misalign = DR_INIT (dr);
716 aligned_to = DR_ALIGNED_TO (dr);
717 base_addr = DR_BASE_ADDRESS (dr);
718 vectype = STMT_VINFO_VECTYPE (stmt_info);
720 /* In case the dataref is in an inner-loop of the loop that is being
721 vectorized (LOOP), we use the base and misalignment information
722 relative to the outer-loop (LOOP). This is ok only if the misalignment
723 stays the same throughout the execution of the inner-loop, which is why
724 we have to check that the stride of the dataref in the inner-loop evenly
725 divides by the vector size. */
726 if (loop && nested_in_vect_loop_p (loop, stmt))
728 tree step = DR_STEP (dr);
729 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
731 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
733 if (vect_print_dump_info (REPORT_ALIGNMENT))
734 fprintf (vect_dump, "inner step divides the vector-size.");
735 misalign = STMT_VINFO_DR_INIT (stmt_info);
736 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
737 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
741 if (vect_print_dump_info (REPORT_ALIGNMENT))
742 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
743 misalign = NULL_TREE;
747 base = build_fold_indirect_ref (base_addr);
748 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
750 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
753 if (vect_print_dump_info (REPORT_ALIGNMENT))
755 fprintf (vect_dump, "Unknown alignment for access: ");
756 print_generic_expr (vect_dump, base, TDF_SLIM);
762 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
764 || (TREE_CODE (base_addr) == SSA_NAME
765 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
766 TREE_TYPE (base_addr)))),
770 base_aligned = false;
774 /* Do not change the alignment of global variables if
775 flag_section_anchors is enabled. */
776 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
777 || (TREE_STATIC (base) && flag_section_anchors))
779 if (vect_print_dump_info (REPORT_DETAILS))
781 fprintf (vect_dump, "can't force alignment of ref: ");
782 print_generic_expr (vect_dump, ref, TDF_SLIM);
787 /* Force the alignment of the decl.
788 NOTE: This is the only change to the code we make during
789 the analysis phase, before deciding to vectorize the loop. */
790 if (vect_print_dump_info (REPORT_DETAILS))
791 fprintf (vect_dump, "force alignment");
792 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
793 DECL_USER_ALIGN (base) = 1;
796 /* At this point we assume that the base is aligned. */
797 gcc_assert (base_aligned
798 || (TREE_CODE (base) == VAR_DECL
799 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
801 /* Modulo alignment. */
802 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
804 if (!host_integerp (misalign, 1))
806 /* Negative or overflowed misalignment value. */
807 if (vect_print_dump_info (REPORT_DETAILS))
808 fprintf (vect_dump, "unexpected misalign value");
812 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
814 if (vect_print_dump_info (REPORT_DETAILS))
816 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
817 print_generic_expr (vect_dump, ref, TDF_SLIM);
824 /* Function vect_compute_data_refs_alignment
826 Compute the misalignment of data references in the loop.
827 Return FALSE if a data reference is found that cannot be vectorized. */
830 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
831 bb_vec_info bb_vinfo)
833 VEC (data_reference_p, heap) *datarefs;
834 struct data_reference *dr;
838 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
840 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
842 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
843 if (!vect_compute_data_ref_alignment (dr))
850 /* Function vect_update_misalignment_for_peel
852 DR - the data reference whose misalignment is to be adjusted.
853 DR_PEEL - the data reference whose misalignment is being made
854 zero in the vector loop by the peel.
855 NPEEL - the number of iterations in the peel loop if the misalignment
856 of DR_PEEL is known at compile time. */
859 vect_update_misalignment_for_peel (struct data_reference *dr,
860 struct data_reference *dr_peel, int npeel)
863 VEC(dr_p,heap) *same_align_drs;
864 struct data_reference *current_dr;
865 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
866 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
867 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
868 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
870 /* For interleaved data accesses the step in the loop must be multiplied by
871 the size of the interleaving group. */
872 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
873 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
874 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
875 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
877 /* It can be assumed that the data refs with the same alignment as dr_peel
878 are aligned in the vector loop. */
880 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
881 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
883 if (current_dr != dr)
885 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
886 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
887 SET_DR_MISALIGNMENT (dr, 0);
891 if (known_alignment_for_access_p (dr)
892 && known_alignment_for_access_p (dr_peel))
894 int misal = DR_MISALIGNMENT (dr);
895 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896 misal += npeel * dr_size;
897 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
898 SET_DR_MISALIGNMENT (dr, misal);
902 if (vect_print_dump_info (REPORT_DETAILS))
903 fprintf (vect_dump, "Setting misalignment to -1.");
904 SET_DR_MISALIGNMENT (dr, -1);
908 /* Function vect_verify_datarefs_alignment
910 Return TRUE if all data references in the loop can be
911 handled with respect to alignment. */
914 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
916 VEC (data_reference_p, heap) *datarefs;
917 struct data_reference *dr;
918 enum dr_alignment_support supportable_dr_alignment;
922 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
924 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
926 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
928 gimple stmt = DR_STMT (dr);
929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
931 /* For interleaving, only the alignment of the first access matters. */
932 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
933 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
936 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
937 if (!supportable_dr_alignment)
939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
943 "not vectorized: unsupported unaligned load.");
946 "not vectorized: unsupported unaligned store.");
950 if (supportable_dr_alignment != dr_aligned
951 && vect_print_dump_info (REPORT_ALIGNMENT))
952 fprintf (vect_dump, "Vectorizing an unaligned access.");
958 /* Function vector_alignment_reachable_p
960 Return true if vector alignment for DR is reachable by peeling
961 a few loop iterations. Return false otherwise. */
964 vector_alignment_reachable_p (struct data_reference *dr)
966 gimple stmt = DR_STMT (dr);
967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
970 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
972 /* For interleaved access we peel only if number of iterations in
973 the prolog loop ({VF - misalignment}), is a multiple of the
974 number of the interleaved accesses. */
975 int elem_size, mis_in_elements;
976 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
978 /* FORNOW: handle only known alignment. */
979 if (!known_alignment_for_access_p (dr))
982 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
983 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
985 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
989 /* If misalignment is known at the compile time then allow peeling
990 only if natural alignment is reachable through peeling. */
991 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
993 HOST_WIDE_INT elmsize =
994 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
995 if (vect_print_dump_info (REPORT_DETAILS))
997 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
998 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1000 if (DR_MISALIGNMENT (dr) % elmsize)
1002 if (vect_print_dump_info (REPORT_DETAILS))
1003 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1008 if (!known_alignment_for_access_p (dr))
1010 tree type = (TREE_TYPE (DR_REF (dr)));
1011 tree ba = DR_BASE_OBJECT (dr);
1012 bool is_packed = false;
1015 is_packed = contains_packed_reference (ba);
1017 if (vect_print_dump_info (REPORT_DETAILS))
1018 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1019 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1028 /* Function vect_enhance_data_refs_alignment
1030 This pass will use loop versioning and loop peeling in order to enhance
1031 the alignment of data references in the loop.
1033 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1034 original loop is to be vectorized; Any other loops that are created by
1035 the transformations performed in this pass - are not supposed to be
1036 vectorized. This restriction will be relaxed.
1038 This pass will require a cost model to guide it whether to apply peeling
1039 or versioning or a combination of the two. For example, the scheme that
1040 intel uses when given a loop with several memory accesses, is as follows:
1041 choose one memory access ('p') which alignment you want to force by doing
1042 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1043 other accesses are not necessarily aligned, or (2) use loop versioning to
1044 generate one loop in which all accesses are aligned, and another loop in
1045 which only 'p' is necessarily aligned.
1047 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1048 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1049 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1051 Devising a cost model is the most critical aspect of this work. It will
1052 guide us on which access to peel for, whether to use loop versioning, how
1053 many versions to create, etc. The cost model will probably consist of
1054 generic considerations as well as target specific considerations (on
1055 powerpc for example, misaligned stores are more painful than misaligned
1058 Here are the general steps involved in alignment enhancements:
1060 -- original loop, before alignment analysis:
1061 for (i=0; i<N; i++){
1062 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1063 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1066 -- After vect_compute_data_refs_alignment:
1067 for (i=0; i<N; i++){
1068 x = q[i]; # DR_MISALIGNMENT(q) = 3
1069 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1072 -- Possibility 1: we do loop versioning:
1074 for (i=0; i<N; i++){ # loop 1A
1075 x = q[i]; # DR_MISALIGNMENT(q) = 3
1076 p[i] = y; # DR_MISALIGNMENT(p) = 0
1080 for (i=0; i<N; i++){ # loop 1B
1081 x = q[i]; # DR_MISALIGNMENT(q) = 3
1082 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1086 -- Possibility 2: we do loop peeling:
1087 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1091 for (i = 3; i < N; i++){ # loop 2A
1092 x = q[i]; # DR_MISALIGNMENT(q) = 0
1093 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1096 -- Possibility 3: combination of loop peeling and versioning:
1097 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1102 for (i = 3; i<N; i++){ # loop 3A
1103 x = q[i]; # DR_MISALIGNMENT(q) = 0
1104 p[i] = y; # DR_MISALIGNMENT(p) = 0
1108 for (i = 3; i<N; i++){ # loop 3B
1109 x = q[i]; # DR_MISALIGNMENT(q) = 0
1110 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1114 These loops are later passed to loop_transform to be vectorized. The
1115 vectorizer will use the alignment information to guide the transformation
1116 (whether to generate regular loads/stores, or with special handling for
1120 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1122 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 enum dr_alignment_support supportable_dr_alignment;
1125 struct data_reference *dr0 = NULL;
1126 struct data_reference *dr;
1128 bool do_peeling = false;
1129 bool do_versioning = false;
1132 stmt_vec_info stmt_info;
1133 int vect_versioning_for_alias_required;
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1138 /* While cost model enhancements are expected in the future, the high level
1139 view of the code at this time is as follows:
1141 A) If there is an unsupported misaligned access then see if peeling
1142 to align this access can make all data references satisfy
1143 vect_supportable_dr_alignment. If so, update data structures
1144 as needed and return true.
1146 B) If peeling wasn't possible and there is a data reference with an
1147 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1148 then see if loop versioning checks can be used to make all data
1149 references satisfy vect_supportable_dr_alignment. If so, update
1150 data structures as needed and return true.
1152 C) If neither peeling nor versioning were successful then return false if
1153 any data reference does not satisfy vect_supportable_dr_alignment.
1155 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1157 Note, Possibility 3 above (which is peeling and versioning together) is not
1158 being done at this time. */
1160 /* (1) Peeling to force alignment. */
1162 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1164 + How many accesses will become aligned due to the peeling
1165 - How many accesses will become unaligned due to the peeling,
1166 and the cost of misaligned accesses.
1167 - The cost of peeling (the extra runtime checks, the increase
1170 The scheme we use FORNOW: peel to force the alignment of the first
1171 unsupported misaligned access in the loop.
1173 TODO: Use a cost model. */
1175 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1177 stmt = DR_STMT (dr);
1178 stmt_info = vinfo_for_stmt (stmt);
1179 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1181 /* For interleaving, only the alignment of the first access
1183 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1184 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1187 if (!supportable_dr_alignment)
1189 do_peeling = vector_alignment_reachable_p (dr);
1192 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1193 fprintf (vect_dump, "vector alignment may not be reachable");
1198 vect_versioning_for_alias_required
1199 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1201 /* Temporarily, if versioning for alias is required, we disable peeling
1202 until we support peeling and versioning. Often peeling for alignment
1203 will require peeling for loop-bound, which in turn requires that we
1204 know how to adjust the loop ivs after the loop. */
1205 if (vect_versioning_for_alias_required
1206 || !vect_can_advance_ivs_p (loop_vinfo)
1207 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1214 gimple stmt = DR_STMT (dr0);
1215 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1216 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1217 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1219 if (known_alignment_for_access_p (dr0))
1221 /* Since it's known at compile time, compute the number of iterations
1222 in the peeled loop (the peeling factor) for use in updating
1223 DR_MISALIGNMENT values. The peeling factor is the vectorization
1224 factor minus the misalignment as an element count. */
1225 mis = DR_MISALIGNMENT (dr0);
1226 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1227 npeel = nelements - mis;
1229 /* For interleaved data access every iteration accesses all the
1230 members of the group, therefore we divide the number of iterations
1231 by the group size. */
1232 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1233 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1234 npeel /= DR_GROUP_SIZE (stmt_info);
1236 if (vect_print_dump_info (REPORT_DETAILS))
1237 fprintf (vect_dump, "Try peeling by %d", npeel);
1240 /* Ensure that all data refs can be vectorized after the peel. */
1241 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1243 int save_misalignment;
1248 stmt = DR_STMT (dr);
1249 stmt_info = vinfo_for_stmt (stmt);
1250 /* For interleaving, only the alignment of the first access
1252 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1253 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1256 save_misalignment = DR_MISALIGNMENT (dr);
1257 vect_update_misalignment_for_peel (dr, dr0, npeel);
1258 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1259 SET_DR_MISALIGNMENT (dr, save_misalignment);
1261 if (!supportable_dr_alignment)
1270 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1271 If the misalignment of DR_i is identical to that of dr0 then set
1272 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1273 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1274 by the peeling factor times the element size of DR_i (MOD the
1275 vectorization factor times the size). Otherwise, the
1276 misalignment of DR_i must be set to unknown. */
1277 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1279 vect_update_misalignment_for_peel (dr, dr0, npeel);
1281 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1282 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1283 SET_DR_MISALIGNMENT (dr0, 0);
1284 if (vect_print_dump_info (REPORT_ALIGNMENT))
1285 fprintf (vect_dump, "Alignment of access forced using peeling.");
1287 if (vect_print_dump_info (REPORT_DETAILS))
1288 fprintf (vect_dump, "Peeling for alignment will be applied.");
1290 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1297 /* (2) Versioning to force alignment. */
1299 /* Try versioning if:
1300 1) flag_tree_vect_loop_version is TRUE
1301 2) optimize loop for speed
1302 3) there is at least one unsupported misaligned data ref with an unknown
1304 4) all misaligned data refs with a known misalignment are supported, and
1305 5) the number of runtime alignment checks is within reason. */
1308 flag_tree_vect_loop_version
1309 && optimize_loop_nest_for_speed_p (loop)
1310 && (!loop->inner); /* FORNOW */
1314 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1316 stmt = DR_STMT (dr);
1317 stmt_info = vinfo_for_stmt (stmt);
1319 /* For interleaving, only the alignment of the first access
1321 if (aligned_access_p (dr)
1322 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1323 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1326 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1328 if (!supportable_dr_alignment)
1334 if (known_alignment_for_access_p (dr)
1335 || VEC_length (gimple,
1336 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1337 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1339 do_versioning = false;
1343 stmt = DR_STMT (dr);
1344 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1345 gcc_assert (vectype);
1347 /* The rightmost bits of an aligned address must be zeros.
1348 Construct the mask needed for this test. For example,
1349 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1350 mask must be 15 = 0xf. */
1351 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1353 /* FORNOW: use the same mask to test all potentially unaligned
1354 references in the loop. The vectorizer currently supports
1355 a single vector size, see the reference to
1356 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1357 vectorization factor is computed. */
1358 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1359 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1360 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1361 VEC_safe_push (gimple, heap,
1362 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1367 /* Versioning requires at least one misaligned data reference. */
1368 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1369 do_versioning = false;
1370 else if (!do_versioning)
1371 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1376 VEC(gimple,heap) *may_misalign_stmts
1377 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1380 /* It can now be assumed that the data references in the statements
1381 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1382 of the loop being vectorized. */
1383 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1386 dr = STMT_VINFO_DATA_REF (stmt_info);
1387 SET_DR_MISALIGNMENT (dr, 0);
1388 if (vect_print_dump_info (REPORT_ALIGNMENT))
1389 fprintf (vect_dump, "Alignment of access forced using versioning.");
1392 if (vect_print_dump_info (REPORT_DETAILS))
1393 fprintf (vect_dump, "Versioning for alignment will be applied.");
1395 /* Peeling and versioning can't be done together at this time. */
1396 gcc_assert (! (do_peeling && do_versioning));
1398 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1403 /* This point is reached if neither peeling nor versioning is being done. */
1404 gcc_assert (! (do_peeling || do_versioning));
1406 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1411 /* Function vect_analyze_data_refs_alignment
1413 Analyze the alignment of the data-references in the loop.
1414 Return FALSE if a data reference is found that cannot be vectorized. */
1417 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1418 bb_vec_info bb_vinfo)
1420 if (vect_print_dump_info (REPORT_DETAILS))
1421 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1423 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1425 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1427 "not vectorized: can't calculate alignment for data ref.");
1435 /* Analyze groups of strided accesses: check that DR belongs to a group of
1436 strided accesses of legal size, step, etc. Detect gaps, single element
1437 interleaving, and other special cases. Set strided access info.
1438 Collect groups of strided stores for further use in SLP analysis. */
1441 vect_analyze_group_access (struct data_reference *dr)
1443 tree step = DR_STEP (dr);
1444 tree scalar_type = TREE_TYPE (DR_REF (dr));
1445 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1446 gimple stmt = DR_STMT (dr);
1447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1448 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1449 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1450 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1451 HOST_WIDE_INT stride;
1452 bool slp_impossible = false;
1454 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1455 interleaving group (including gaps). */
1456 stride = dr_step / type_size;
1458 /* Not consecutive access is possible only if it is a part of interleaving. */
1459 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1461 /* Check if it this DR is a part of interleaving, and is a single
1462 element of the group that is accessed in the loop. */
1464 /* Gaps are supported only for loads. STEP must be a multiple of the type
1465 size. The size of the group must be a power of 2. */
1467 && (dr_step % type_size) == 0
1469 && exact_log2 (stride) != -1)
1471 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1472 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1473 if (vect_print_dump_info (REPORT_DR_DETAILS))
1475 fprintf (vect_dump, "Detected single element interleaving ");
1476 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1477 fprintf (vect_dump, " step ");
1478 print_generic_expr (vect_dump, step, TDF_SLIM);
1482 if (vect_print_dump_info (REPORT_DETAILS))
1483 fprintf (vect_dump, "not consecutive access");
1487 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1489 /* First stmt in the interleaving chain. Check the chain. */
1490 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1491 struct data_reference *data_ref = dr;
1492 unsigned int count = 1;
1494 tree prev_init = DR_INIT (data_ref);
1496 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1500 /* Skip same data-refs. In case that two or more stmts share data-ref
1501 (supported only for loads), we vectorize only the first stmt, and
1502 the rest get their vectorized loads from the first one. */
1503 if (!tree_int_cst_compare (DR_INIT (data_ref),
1504 DR_INIT (STMT_VINFO_DATA_REF (
1505 vinfo_for_stmt (next)))))
1507 if (!DR_IS_READ (data_ref))
1509 if (vect_print_dump_info (REPORT_DETAILS))
1510 fprintf (vect_dump, "Two store stmts share the same dr.");
1514 /* Check that there is no load-store dependencies for this loads
1515 to prevent a case of load-store-load to the same location. */
1516 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1517 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1519 if (vect_print_dump_info (REPORT_DETAILS))
1521 "READ_WRITE dependence in interleaving.");
1525 /* For load use the same data-ref load. */
1526 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1529 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1534 /* Check that all the accesses have the same STEP. */
1535 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1536 if (tree_int_cst_compare (step, next_step))
1538 if (vect_print_dump_info (REPORT_DETAILS))
1539 fprintf (vect_dump, "not consecutive access in interleaving");
1543 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1544 /* Check that the distance between two accesses is equal to the type
1545 size. Otherwise, we have gaps. */
1546 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1547 - TREE_INT_CST_LOW (prev_init)) / type_size;
1550 /* FORNOW: SLP of accesses with gaps is not supported. */
1551 slp_impossible = true;
1552 if (!DR_IS_READ (data_ref))
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "interleaved store with gaps");
1562 /* Store the gap from the previous member of the group. If there is no
1563 gap in the access, DR_GROUP_GAP is always 1. */
1564 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1566 prev_init = DR_INIT (data_ref);
1567 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1568 /* Count the number of data-refs in the chain. */
1572 /* COUNT is the number of accesses found, we multiply it by the size of
1573 the type to get COUNT_IN_BYTES. */
1574 count_in_bytes = type_size * count;
1576 /* Check that the size of the interleaving (including gaps) is not
1577 greater than STEP. */
1578 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1580 if (vect_print_dump_info (REPORT_DETAILS))
1582 fprintf (vect_dump, "interleaving size is greater than step for ");
1583 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1588 /* Check that the size of the interleaving is equal to STEP for stores,
1589 i.e., that there are no gaps. */
1590 if (dr_step && dr_step != count_in_bytes)
1592 if (DR_IS_READ (dr))
1594 slp_impossible = true;
1595 /* There is a gap after the last load in the group. This gap is a
1596 difference between the stride and the number of elements. When
1597 there is no gap, this difference should be 0. */
1598 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1602 if (vect_print_dump_info (REPORT_DETAILS))
1603 fprintf (vect_dump, "interleaved store with gaps");
1608 /* Check that STEP is a multiple of type size. */
1609 if (dr_step && (dr_step % type_size) != 0)
1611 if (vect_print_dump_info (REPORT_DETAILS))
1613 fprintf (vect_dump, "step is not a multiple of type size: step ");
1614 print_generic_expr (vect_dump, step, TDF_SLIM);
1615 fprintf (vect_dump, " size ");
1616 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1622 /* FORNOW: we handle only interleaving that is a power of 2.
1623 We don't fail here if it may be still possible to vectorize the
1624 group using SLP. If not, the size of the group will be checked in
1625 vect_analyze_operations, and the vectorization will fail. */
1626 if (exact_log2 (stride) == -1)
1628 if (vect_print_dump_info (REPORT_DETAILS))
1629 fprintf (vect_dump, "interleaving is not a power of 2");
1638 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1639 if (vect_print_dump_info (REPORT_DETAILS))
1640 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1642 /* SLP: create an SLP data structure for every interleaving group of
1643 stores for further analysis in vect_analyse_slp. */
1644 if (!DR_IS_READ (dr) && !slp_impossible)
1647 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1650 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1659 /* Analyze the access pattern of the data-reference DR.
1660 In case of non-consecutive accesses call vect_analyze_group_access() to
1661 analyze groups of strided accesses. */
1664 vect_analyze_data_ref_access (struct data_reference *dr)
1666 tree step = DR_STEP (dr);
1667 tree scalar_type = TREE_TYPE (DR_REF (dr));
1668 gimple stmt = DR_STMT (dr);
1669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1670 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1671 struct loop *loop = NULL;
1672 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1675 loop = LOOP_VINFO_LOOP (loop_vinfo);
1677 if (loop_vinfo && !step)
1679 if (vect_print_dump_info (REPORT_DETAILS))
1680 fprintf (vect_dump, "bad data-ref access in loop");
1684 /* Don't allow invariant accesses in loops. */
1685 if (loop_vinfo && dr_step == 0)
1688 if (loop && nested_in_vect_loop_p (loop, stmt))
1690 /* Interleaved accesses are not yet supported within outer-loop
1691 vectorization for references in the inner-loop. */
1692 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1694 /* For the rest of the analysis we use the outer-loop step. */
1695 step = STMT_VINFO_DR_STEP (stmt_info);
1696 dr_step = TREE_INT_CST_LOW (step);
1700 if (vect_print_dump_info (REPORT_ALIGNMENT))
1701 fprintf (vect_dump, "zero step in outer loop.");
1702 if (DR_IS_READ (dr))
1710 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1712 /* Mark that it is not interleaving. */
1713 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1717 if (loop && nested_in_vect_loop_p (loop, stmt))
1719 if (vect_print_dump_info (REPORT_ALIGNMENT))
1720 fprintf (vect_dump, "strided access in outer loop.");
1724 /* Not consecutive access - check if it's a part of interleaving group. */
1725 return vect_analyze_group_access (dr);
1729 /* Function vect_analyze_data_ref_accesses.
1731 Analyze the access pattern of all the data references in the loop.
1733 FORNOW: the only access pattern that is considered vectorizable is a
1734 simple step 1 (consecutive) access.
1736 FORNOW: handle only arrays and pointer accesses. */
1739 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1742 VEC (data_reference_p, heap) *datarefs;
1743 struct data_reference *dr;
1745 if (vect_print_dump_info (REPORT_DETAILS))
1746 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1749 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1751 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1753 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1754 if (!vect_analyze_data_ref_access (dr))
1756 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1757 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1764 /* Function vect_prune_runtime_alias_test_list.
1766 Prune a list of ddrs to be tested at run-time by versioning for alias.
1767 Return FALSE if resulting list of ddrs is longer then allowed by
1768 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1771 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1773 VEC (ddr_p, heap) * ddrs =
1774 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1777 if (vect_print_dump_info (REPORT_DETAILS))
1778 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1780 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1785 ddr_i = VEC_index (ddr_p, ddrs, i);
1788 for (j = 0; j < i; j++)
1790 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1792 if (vect_vfa_range_equal (ddr_i, ddr_j))
1794 if (vect_print_dump_info (REPORT_DR_DETAILS))
1796 fprintf (vect_dump, "found equal ranges ");
1797 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1798 fprintf (vect_dump, ", ");
1799 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1800 fprintf (vect_dump, " and ");
1801 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1802 fprintf (vect_dump, ", ");
1803 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1812 VEC_ordered_remove (ddr_p, ddrs, i);
1818 if (VEC_length (ddr_p, ddrs) >
1819 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1821 if (vect_print_dump_info (REPORT_DR_DETAILS))
1824 "disable versioning for alias - max number of generated "
1825 "checks exceeded.");
1828 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1837 /* Function vect_analyze_data_refs.
1839 Find all the data references in the loop or basic block.
1841 The general structure of the analysis of data refs in the vectorizer is as
1843 1- vect_analyze_data_refs(loop/bb): call
1844 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1845 in the loop/bb and their dependences.
1846 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1847 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1848 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1853 vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1855 struct loop *loop = NULL;
1856 basic_block bb = NULL;
1858 VEC (data_reference_p, heap) *datarefs;
1859 struct data_reference *dr;
1862 if (vect_print_dump_info (REPORT_DETAILS))
1863 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1867 loop = LOOP_VINFO_LOOP (loop_vinfo);
1868 compute_data_dependences_for_loop (loop, true,
1869 &LOOP_VINFO_DATAREFS (loop_vinfo),
1870 &LOOP_VINFO_DDRS (loop_vinfo));
1871 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1875 bb = BB_VINFO_BB (bb_vinfo);
1876 compute_data_dependences_for_bb (bb, true,
1877 &BB_VINFO_DATAREFS (bb_vinfo),
1878 &BB_VINFO_DDRS (bb_vinfo));
1879 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1882 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1883 from stmt_vec_info struct to DR and vectype. */
1885 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1888 stmt_vec_info stmt_info;
1890 tree base, offset, init;
1892 if (!dr || !DR_REF (dr))
1894 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1895 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1899 stmt = DR_STMT (dr);
1900 stmt_info = vinfo_for_stmt (stmt);
1902 /* Check that analysis of the data-ref succeeded. */
1903 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1906 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1908 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1909 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1914 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1916 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1917 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1922 base = unshare_expr (DR_BASE_ADDRESS (dr));
1923 offset = unshare_expr (DR_OFFSET (dr));
1924 init = unshare_expr (DR_INIT (dr));
1926 /* Update DR field in stmt_vec_info struct. */
1927 bb = gimple_bb (stmt);
1929 /* If the dataref is in an inner-loop of the loop that is considered for
1930 for vectorization, we also want to analyze the access relative to
1931 the outer-loop (DR contains information only relative to the
1932 inner-most enclosing loop). We do that by building a reference to the
1933 first location accessed by the inner-loop, and analyze it relative to
1935 if (loop && nested_in_vect_loop_p (loop, stmt))
1937 tree outer_step, outer_base, outer_init;
1938 HOST_WIDE_INT pbitsize, pbitpos;
1940 enum machine_mode pmode;
1941 int punsignedp, pvolatilep;
1942 affine_iv base_iv, offset_iv;
1945 /* Build a reference to the first location accessed by the
1946 inner-loop: *(BASE+INIT). (The first location is actually
1947 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1948 tree inner_base = build_fold_indirect_ref
1949 (fold_build2 (POINTER_PLUS_EXPR,
1950 TREE_TYPE (base), base,
1951 fold_convert (sizetype, init)));
1953 if (vect_print_dump_info (REPORT_DETAILS))
1955 fprintf (vect_dump, "analyze in outer-loop: ");
1956 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1959 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1960 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1961 gcc_assert (outer_base != NULL_TREE);
1963 if (pbitpos % BITS_PER_UNIT != 0)
1965 if (vect_print_dump_info (REPORT_DETAILS))
1966 fprintf (vect_dump, "failed: bit offset alignment.\n");
1970 outer_base = build_fold_addr_expr (outer_base);
1971 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1974 if (vect_print_dump_info (REPORT_DETAILS))
1975 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1982 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1990 offset_iv.base = ssize_int (0);
1991 offset_iv.step = ssize_int (0);
1993 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1996 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "evolution of offset is not affine.\n");
2001 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2002 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2003 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2004 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2005 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2007 outer_step = size_binop (PLUS_EXPR,
2008 fold_convert (ssizetype, base_iv.step),
2009 fold_convert (ssizetype, offset_iv.step));
2011 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2012 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2013 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2014 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2015 STMT_VINFO_DR_OFFSET (stmt_info) =
2016 fold_convert (ssizetype, offset_iv.base);
2017 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2018 size_int (highest_pow2_factor (offset_iv.base));
2020 if (vect_print_dump_info (REPORT_DETAILS))
2022 fprintf (vect_dump, "\touter base_address: ");
2023 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2024 fprintf (vect_dump, "\n\touter offset from base address: ");
2025 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2026 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2027 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2028 fprintf (vect_dump, "\n\touter step: ");
2029 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2030 fprintf (vect_dump, "\n\touter aligned to: ");
2031 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2035 if (STMT_VINFO_DATA_REF (stmt_info))
2037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2040 "not vectorized: more than one data ref in stmt: ");
2041 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2046 STMT_VINFO_DATA_REF (stmt_info) = dr;
2048 /* Set vectype for STMT. */
2049 scalar_type = TREE_TYPE (DR_REF (dr));
2050 STMT_VINFO_VECTYPE (stmt_info) =
2051 get_vectype_for_scalar_type (scalar_type);
2052 if (!STMT_VINFO_VECTYPE (stmt_info))
2054 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2057 "not vectorized: no vectype for stmt: ");
2058 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2059 fprintf (vect_dump, " scalar_type: ");
2060 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2070 /* Function vect_get_new_vect_var.
2072 Returns a name for a new variable. The current naming scheme appends the
2073 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2074 the name of vectorizer generated variables, and appends that to NAME if
2078 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2085 case vect_simple_var:
2088 case vect_scalar_var:
2091 case vect_pointer_var:
2100 char* tmp = concat (prefix, name, NULL);
2101 new_vect_var = create_tmp_var (type, tmp);
2105 new_vect_var = create_tmp_var (type, prefix);
2107 /* Mark vector typed variable as a gimple register variable. */
2108 if (TREE_CODE (type) == VECTOR_TYPE)
2109 DECL_GIMPLE_REG_P (new_vect_var) = true;
2111 return new_vect_var;
2115 /* Function vect_create_addr_base_for_vector_ref.
2117 Create an expression that computes the address of the first memory location
2118 that will be accessed for a data reference.
2121 STMT: The statement containing the data reference.
2122 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2123 OFFSET: Optional. If supplied, it is be added to the initial address.
2124 LOOP: Specify relative to which loop-nest should the address be computed.
2125 For example, when the dataref is in an inner-loop nested in an
2126 outer-loop that is now being vectorized, LOOP can be either the
2127 outer-loop, or the inner-loop. The first memory location accessed
2128 by the following dataref ('in' points to short):
2135 if LOOP=i_loop: &in (relative to i_loop)
2136 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2139 1. Return an SSA_NAME whose value is the address of the memory location of
2140 the first vector of the data reference.
2141 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2142 these statement(s) which define the returned SSA_NAME.
2144 FORNOW: We are only handling array accesses with step 1. */
2147 vect_create_addr_base_for_vector_ref (gimple stmt,
2148 gimple_seq *new_stmt_list,
2152 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2153 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2154 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2156 tree data_ref_base_var;
2158 tree addr_base, addr_expr;
2160 gimple_seq seq = NULL;
2161 tree base_offset = unshare_expr (DR_OFFSET (dr));
2162 tree init = unshare_expr (DR_INIT (dr));
2164 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2167 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2169 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2171 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2173 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2174 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2175 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2179 base_name = build_fold_indirect_ref (data_ref_base);
2182 base_offset = ssize_int (0);
2183 init = ssize_int (0);
2184 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2187 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2188 add_referenced_var (data_ref_base_var);
2189 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2191 gimple_seq_add_seq (new_stmt_list, seq);
2193 /* Create base_offset */
2194 base_offset = size_binop (PLUS_EXPR,
2195 fold_convert (sizetype, base_offset),
2196 fold_convert (sizetype, init));
2197 dest = create_tmp_var (sizetype, "base_off");
2198 add_referenced_var (dest);
2199 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2200 gimple_seq_add_seq (new_stmt_list, seq);
2204 tree tmp = create_tmp_var (sizetype, "offset");
2206 add_referenced_var (tmp);
2207 offset = fold_build2 (MULT_EXPR, sizetype,
2208 fold_convert (sizetype, offset), step);
2209 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2210 base_offset, offset);
2211 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2212 gimple_seq_add_seq (new_stmt_list, seq);
2215 /* base + base_offset */
2217 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2218 data_ref_base, base_offset);
2221 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2222 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2224 addr_base = build1 (ADDR_EXPR,
2225 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2226 unshare_expr (DR_REF (dr)));
2229 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2231 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2232 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2233 get_name (base_name));
2234 add_referenced_var (addr_expr);
2235 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2236 gimple_seq_add_seq (new_stmt_list, seq);
2238 if (vect_print_dump_info (REPORT_DETAILS))
2240 fprintf (vect_dump, "created ");
2241 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2248 /* Function vect_create_data_ref_ptr.
2250 Create a new pointer to vector type (vp), that points to the first location
2251 accessed in the loop by STMT, along with the def-use update chain to
2252 appropriately advance the pointer through the loop iterations. Also set
2253 aliasing information for the pointer. This vector pointer is used by the
2254 callers to this function to create a memory reference expression for vector
2258 1. STMT: a stmt that references memory. Expected to be of the form
2259 GIMPLE_ASSIGN <name, data-ref> or
2260 GIMPLE_ASSIGN <data-ref, name>.
2261 2. AT_LOOP: the loop where the vector memref is to be created.
2262 3. OFFSET (optional): an offset to be added to the initial address accessed
2263 by the data-ref in STMT.
2264 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2265 pointing to the initial address.
2266 5. TYPE: if not NULL indicates the required type of the data-ref.
2269 1. Declare a new ptr to vector_type, and have it point to the base of the
2270 data reference (initial addressed accessed by the data reference).
2271 For example, for vector of type V8HI, the following code is generated:
2274 vp = (v8hi *)initial_address;
2276 if OFFSET is not supplied:
2277 initial_address = &a[init];
2278 if OFFSET is supplied:
2279 initial_address = &a[init + OFFSET];
2281 Return the initial_address in INITIAL_ADDRESS.
2283 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2284 update the pointer in each iteration of the loop.
2286 Return the increment stmt that updates the pointer in PTR_INCR.
2288 3. Set INV_P to true if the access pattern of the data reference in the
2289 vectorized loop is invariant. Set it to false otherwise.
2291 4. Return the pointer. */
2294 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2295 tree offset, tree *initial_address, gimple *ptr_incr,
2296 bool only_init, bool *inv_p)
2299 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2300 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2301 struct loop *loop = NULL;
2302 bool nested_in_vect_loop = false;
2303 struct loop *containing_loop = NULL;
2304 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2309 gimple_seq new_stmt_list = NULL;
2313 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2315 gimple_stmt_iterator incr_gsi;
2317 tree indx_before_incr, indx_after_incr;
2320 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2321 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2325 loop = LOOP_VINFO_LOOP (loop_vinfo);
2326 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2327 containing_loop = (gimple_bb (stmt))->loop_father;
2328 pe = loop_preheader_edge (loop);
2332 gcc_assert (bb_vinfo);
2337 /* Check the step (evolution) of the load in LOOP, and record
2338 whether it's invariant. */
2339 if (nested_in_vect_loop)
2340 step = STMT_VINFO_DR_STEP (stmt_info);
2342 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2344 if (tree_int_cst_compare (step, size_zero_node) == 0)
2349 /* Create an expression for the first address accessed by this load
2351 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2353 if (vect_print_dump_info (REPORT_DETAILS))
2355 tree data_ref_base = base_name;
2356 fprintf (vect_dump, "create vector-pointer variable to type: ");
2357 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2358 if (TREE_CODE (data_ref_base) == VAR_DECL
2359 || TREE_CODE (data_ref_base) == ARRAY_REF)
2360 fprintf (vect_dump, " vectorizing an array ref: ");
2361 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2362 fprintf (vect_dump, " vectorizing a record based array ref: ");
2363 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2364 fprintf (vect_dump, " vectorizing a pointer ref: ");
2365 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2368 /** (1) Create the new vector-pointer variable: **/
2369 vect_ptr_type = build_pointer_type (vectype);
2370 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2371 get_name (base_name));
2373 /* Vector types inherit the alias set of their component type by default so
2374 we need to use a ref-all pointer if the data reference does not conflict
2375 with the created vector data reference because it is not addressable. */
2376 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2377 get_alias_set (DR_REF (dr))))
2379 vect_ptr_type = build_pointer_type_for_mode (vectype, ptr_mode, true);
2380 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2381 get_name (base_name));
2384 /* Likewise for any of the data references in the stmt group. */
2385 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2387 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2390 tree lhs = gimple_assign_lhs (orig_stmt);
2391 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2392 get_alias_set (lhs)))
2395 = build_pointer_type_for_mode (vectype, ptr_mode, true);
2397 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2398 get_name (base_name));
2402 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2407 add_referenced_var (vect_ptr);
2409 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2410 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2411 def-use update cycles for the pointer: One relative to the outer-loop
2412 (LOOP), which is what steps (3) and (4) below do. The other is relative
2413 to the inner-loop (which is the inner-most loop containing the dataref),
2414 and this is done be step (5) below.
2416 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2417 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2418 redundant. Steps (3),(4) create the following:
2421 LOOP: vp1 = phi(vp0,vp2)
2427 If there is an inner-loop nested in loop, then step (5) will also be
2428 applied, and an additional update in the inner-loop will be created:
2431 LOOP: vp1 = phi(vp0,vp2)
2433 inner: vp3 = phi(vp1,vp4)
2434 vp4 = vp3 + inner_step
2440 /** (3) Calculate the initial address the vector-pointer, and set
2441 the vector-pointer to point to it before the loop: **/
2443 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2445 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2451 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2452 gcc_assert (!new_bb);
2455 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2458 *initial_address = new_temp;
2460 /* Create: p = (vectype *) initial_base */
2461 vec_stmt = gimple_build_assign (vect_ptr,
2462 fold_convert (vect_ptr_type, new_temp));
2463 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2464 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2467 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2468 gcc_assert (!new_bb);
2471 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2473 /** (4) Handle the updating of the vector-pointer inside the loop.
2474 This is needed when ONLY_INIT is false, and also when AT_LOOP
2475 is the inner-loop nested in LOOP (during outer-loop vectorization).
2478 /* No update in loop is required. */
2479 if (only_init && (!loop_vinfo || at_loop == loop))
2481 /* Copy the points-to information if it exists. */
2482 if (DR_PTR_INFO (dr))
2483 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2484 vptr = vect_ptr_init;
2488 /* The step of the vector pointer is the Vector Size. */
2489 tree step = TYPE_SIZE_UNIT (vectype);
2490 /* One exception to the above is when the scalar step of the load in
2491 LOOP is zero. In this case the step here is also zero. */
2493 step = size_zero_node;
2495 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2497 create_iv (vect_ptr_init,
2498 fold_convert (vect_ptr_type, step),
2499 vect_ptr, loop, &incr_gsi, insert_after,
2500 &indx_before_incr, &indx_after_incr);
2501 incr = gsi_stmt (incr_gsi);
2502 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2504 /* Copy the points-to information if it exists. */
2505 if (DR_PTR_INFO (dr))
2507 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2508 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2513 vptr = indx_before_incr;
2516 if (!nested_in_vect_loop || only_init)
2520 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2521 nested in LOOP, if exists: **/
2523 gcc_assert (nested_in_vect_loop);
2526 standard_iv_increment_position (containing_loop, &incr_gsi,
2528 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2529 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2531 incr = gsi_stmt (incr_gsi);
2532 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2534 /* Copy the points-to information if it exists. */
2535 if (DR_PTR_INFO (dr))
2537 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2538 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2543 return indx_before_incr;
2550 /* Function bump_vector_ptr
2552 Increment a pointer (to a vector type) by vector-size. If requested,
2553 i.e. if PTR-INCR is given, then also connect the new increment stmt
2554 to the existing def-use update-chain of the pointer, by modifying
2555 the PTR_INCR as illustrated below:
2557 The pointer def-use update-chain before this function:
2558 DATAREF_PTR = phi (p_0, p_2)
2560 PTR_INCR: p_2 = DATAREF_PTR + step
2562 The pointer def-use update-chain after this function:
2563 DATAREF_PTR = phi (p_0, p_2)
2565 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2567 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2570 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2572 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2573 the loop. The increment amount across iterations is expected
2575 BSI - location where the new update stmt is to be placed.
2576 STMT - the original scalar memory-access stmt that is being vectorized.
2577 BUMP - optional. The offset by which to bump the pointer. If not given,
2578 the offset is assumed to be vector_size.
2580 Output: Return NEW_DATAREF_PTR as illustrated above.
2585 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2586 gimple stmt, tree bump)
2588 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2589 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2590 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2591 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2592 tree update = TYPE_SIZE_UNIT (vectype);
2595 use_operand_p use_p;
2596 tree new_dataref_ptr;
2601 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2602 dataref_ptr, update);
2603 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2604 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2605 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2607 /* Copy the points-to information if it exists. */
2608 if (DR_PTR_INFO (dr))
2609 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2612 return new_dataref_ptr;
2614 /* Update the vector-pointer's cross-iteration increment. */
2615 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2617 tree use = USE_FROM_PTR (use_p);
2619 if (use == dataref_ptr)
2620 SET_USE (use_p, new_dataref_ptr);
2622 gcc_assert (tree_int_cst_compare (use, update) == 0);
2625 return new_dataref_ptr;
2629 /* Function vect_create_destination_var.
2631 Create a new temporary of type VECTYPE. */
2634 vect_create_destination_var (tree scalar_dest, tree vectype)
2637 const char *new_name;
2639 enum vect_var_kind kind;
2641 kind = vectype ? vect_simple_var : vect_scalar_var;
2642 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2644 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2646 new_name = get_name (scalar_dest);
2649 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2650 add_referenced_var (vec_dest);
2655 /* Function vect_strided_store_supported.
2657 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2658 and FALSE otherwise. */
2661 vect_strided_store_supported (tree vectype)
2663 optab interleave_high_optab, interleave_low_optab;
2666 mode = (int) TYPE_MODE (vectype);
2668 /* Check that the operation is supported. */
2669 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2670 vectype, optab_default);
2671 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2672 vectype, optab_default);
2673 if (!interleave_high_optab || !interleave_low_optab)
2675 if (vect_print_dump_info (REPORT_DETAILS))
2676 fprintf (vect_dump, "no optab for interleave.");
2680 if (optab_handler (interleave_high_optab, mode)->insn_code
2682 || optab_handler (interleave_low_optab, mode)->insn_code
2683 == CODE_FOR_nothing)
2685 if (vect_print_dump_info (REPORT_DETAILS))
2686 fprintf (vect_dump, "interleave op not supported by target.");
2694 /* Function vect_permute_store_chain.
2696 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2697 a power of 2, generate interleave_high/low stmts to reorder the data
2698 correctly for the stores. Return the final references for stores in
2701 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2702 The input is 4 vectors each containing 8 elements. We assign a number to each
2703 element, the input sequence is:
2705 1st vec: 0 1 2 3 4 5 6 7
2706 2nd vec: 8 9 10 11 12 13 14 15
2707 3rd vec: 16 17 18 19 20 21 22 23
2708 4th vec: 24 25 26 27 28 29 30 31
2710 The output sequence should be:
2712 1st vec: 0 8 16 24 1 9 17 25
2713 2nd vec: 2 10 18 26 3 11 19 27
2714 3rd vec: 4 12 20 28 5 13 21 30
2715 4th vec: 6 14 22 30 7 15 23 31
2717 i.e., we interleave the contents of the four vectors in their order.
2719 We use interleave_high/low instructions to create such output. The input of
2720 each interleave_high/low operation is two vectors:
2723 the even elements of the result vector are obtained left-to-right from the
2724 high/low elements of the first vector. The odd elements of the result are
2725 obtained left-to-right from the high/low elements of the second vector.
2726 The output of interleave_high will be: 0 4 1 5
2727 and of interleave_low: 2 6 3 7
2730 The permutation is done in log LENGTH stages. In each stage interleave_high
2731 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2732 where the first argument is taken from the first half of DR_CHAIN and the
2733 second argument from it's second half.
2736 I1: interleave_high (1st vec, 3rd vec)
2737 I2: interleave_low (1st vec, 3rd vec)
2738 I3: interleave_high (2nd vec, 4th vec)
2739 I4: interleave_low (2nd vec, 4th vec)
2741 The output for the first stage is:
2743 I1: 0 16 1 17 2 18 3 19
2744 I2: 4 20 5 21 6 22 7 23
2745 I3: 8 24 9 25 10 26 11 27
2746 I4: 12 28 13 29 14 30 15 31
2748 The output of the second stage, i.e. the final result is:
2750 I1: 0 8 16 24 1 9 17 25
2751 I2: 2 10 18 26 3 11 19 27
2752 I3: 4 12 20 28 5 13 21 30
2753 I4: 6 14 22 30 7 15 23 31. */
2756 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2757 unsigned int length,
2759 gimple_stmt_iterator *gsi,
2760 VEC(tree,heap) **result_chain)
2762 tree perm_dest, vect1, vect2, high, low;
2764 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2768 enum tree_code high_code, low_code;
2770 scalar_dest = gimple_assign_lhs (stmt);
2772 /* Check that the operation is supported. */
2773 if (!vect_strided_store_supported (vectype))
2776 *result_chain = VEC_copy (tree, heap, dr_chain);
2778 for (i = 0; i < exact_log2 (length); i++)
2780 for (j = 0; j < length/2; j++)
2782 vect1 = VEC_index (tree, dr_chain, j);
2783 vect2 = VEC_index (tree, dr_chain, j+length/2);
2785 /* Create interleaving stmt:
2786 in the case of big endian:
2787 high = interleave_high (vect1, vect2)
2788 and in the case of little endian:
2789 high = interleave_low (vect1, vect2). */
2790 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2791 DECL_GIMPLE_REG_P (perm_dest) = 1;
2792 add_referenced_var (perm_dest);
2793 if (BYTES_BIG_ENDIAN)
2795 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2796 low_code = VEC_INTERLEAVE_LOW_EXPR;
2800 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2801 high_code = VEC_INTERLEAVE_LOW_EXPR;
2803 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2805 high = make_ssa_name (perm_dest, perm_stmt);
2806 gimple_assign_set_lhs (perm_stmt, high);
2807 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2808 VEC_replace (tree, *result_chain, 2*j, high);
2810 /* Create interleaving stmt:
2811 in the case of big endian:
2812 low = interleave_low (vect1, vect2)
2813 and in the case of little endian:
2814 low = interleave_high (vect1, vect2). */
2815 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2816 DECL_GIMPLE_REG_P (perm_dest) = 1;
2817 add_referenced_var (perm_dest);
2818 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2820 low = make_ssa_name (perm_dest, perm_stmt);
2821 gimple_assign_set_lhs (perm_stmt, low);
2822 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2823 VEC_replace (tree, *result_chain, 2*j+1, low);
2825 dr_chain = VEC_copy (tree, heap, *result_chain);
2830 /* Function vect_setup_realignment
2832 This function is called when vectorizing an unaligned load using
2833 the dr_explicit_realign[_optimized] scheme.
2834 This function generates the following code at the loop prolog:
2837 x msq_init = *(floor(p)); # prolog load
2838 realignment_token = call target_builtin;
2840 x msq = phi (msq_init, ---)
2842 The stmts marked with x are generated only for the case of
2843 dr_explicit_realign_optimized.
2845 The code above sets up a new (vector) pointer, pointing to the first
2846 location accessed by STMT, and a "floor-aligned" load using that pointer.
2847 It also generates code to compute the "realignment-token" (if the relevant
2848 target hook was defined), and creates a phi-node at the loop-header bb
2849 whose arguments are the result of the prolog-load (created by this
2850 function) and the result of a load that takes place in the loop (to be
2851 created by the caller to this function).
2853 For the case of dr_explicit_realign_optimized:
2854 The caller to this function uses the phi-result (msq) to create the
2855 realignment code inside the loop, and sets up the missing phi argument,
2858 msq = phi (msq_init, lsq)
2859 lsq = *(floor(p')); # load in loop
2860 result = realign_load (msq, lsq, realignment_token);
2862 For the case of dr_explicit_realign:
2864 msq = *(floor(p)); # load in loop
2866 lsq = *(floor(p')); # load in loop
2867 result = realign_load (msq, lsq, realignment_token);
2870 STMT - (scalar) load stmt to be vectorized. This load accesses
2871 a memory location that may be unaligned.
2872 BSI - place where new code is to be inserted.
2873 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2877 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2878 target hook, if defined.
2879 Return value - the result of the loop-header phi node. */
2882 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2883 tree *realignment_token,
2884 enum dr_alignment_support alignment_support_scheme,
2886 struct loop **at_loop)
2888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2889 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2891 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2893 tree scalar_dest = gimple_assign_lhs (stmt);
2900 tree msq_init = NULL_TREE;
2903 tree msq = NULL_TREE;
2904 gimple_seq stmts = NULL;
2906 bool compute_in_loop = false;
2907 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2908 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2909 struct loop *loop_for_initial_load;
2911 gcc_assert (alignment_support_scheme == dr_explicit_realign
2912 || alignment_support_scheme == dr_explicit_realign_optimized);
2914 /* We need to generate three things:
2915 1. the misalignment computation
2916 2. the extra vector load (for the optimized realignment scheme).
2917 3. the phi node for the two vectors from which the realignment is
2918 done (for the optimized realignment scheme).
2921 /* 1. Determine where to generate the misalignment computation.
2923 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2924 calculation will be generated by this function, outside the loop (in the
2925 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2926 caller, inside the loop.
2928 Background: If the misalignment remains fixed throughout the iterations of
2929 the loop, then both realignment schemes are applicable, and also the
2930 misalignment computation can be done outside LOOP. This is because we are
2931 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2932 are a multiple of VS (the Vector Size), and therefore the misalignment in
2933 different vectorized LOOP iterations is always the same.
2934 The problem arises only if the memory access is in an inner-loop nested
2935 inside LOOP, which is now being vectorized using outer-loop vectorization.
2936 This is the only case when the misalignment of the memory access may not
2937 remain fixed throughout the iterations of the inner-loop (as explained in
2938 detail in vect_supportable_dr_alignment). In this case, not only is the
2939 optimized realignment scheme not applicable, but also the misalignment
2940 computation (and generation of the realignment token that is passed to
2941 REALIGN_LOAD) have to be done inside the loop.
2943 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2944 or not, which in turn determines if the misalignment is computed inside
2945 the inner-loop, or outside LOOP. */
2947 if (init_addr != NULL_TREE)
2949 compute_in_loop = true;
2950 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2954 /* 2. Determine where to generate the extra vector load.
2956 For the optimized realignment scheme, instead of generating two vector
2957 loads in each iteration, we generate a single extra vector load in the
2958 preheader of the loop, and in each iteration reuse the result of the
2959 vector load from the previous iteration. In case the memory access is in
2960 an inner-loop nested inside LOOP, which is now being vectorized using
2961 outer-loop vectorization, we need to determine whether this initial vector
2962 load should be generated at the preheader of the inner-loop, or can be
2963 generated at the preheader of LOOP. If the memory access has no evolution
2964 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2965 to be generated inside LOOP (in the preheader of the inner-loop). */
2967 if (nested_in_vect_loop)
2969 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2970 bool invariant_in_outerloop =
2971 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2972 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2975 loop_for_initial_load = loop;
2977 *at_loop = loop_for_initial_load;
2979 /* 3. For the case of the optimized realignment, create the first vector
2980 load at the loop preheader. */
2982 if (alignment_support_scheme == dr_explicit_realign_optimized)
2984 /* Create msq_init = *(floor(p1)) in the loop preheader */
2986 gcc_assert (!compute_in_loop);
2987 pe = loop_preheader_edge (loop_for_initial_load);
2988 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2989 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
2990 &init_addr, &inc, true, &inv_p);
2991 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2992 new_stmt = gimple_build_assign (vec_dest, data_ref);
2993 new_temp = make_ssa_name (vec_dest, new_stmt);
2994 gimple_assign_set_lhs (new_stmt, new_temp);
2995 mark_symbols_for_renaming (new_stmt);
2996 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2997 gcc_assert (!new_bb);
2998 msq_init = gimple_assign_lhs (new_stmt);
3001 /* 4. Create realignment token using a target builtin, if available.
3002 It is done either inside the containing loop, or before LOOP (as
3003 determined above). */
3005 if (targetm.vectorize.builtin_mask_for_load)
3009 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3010 if (compute_in_loop)
3011 gcc_assert (init_addr); /* already computed by the caller. */
3014 /* Generate the INIT_ADDR computation outside LOOP. */
3015 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3017 pe = loop_preheader_edge (loop);
3018 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3019 gcc_assert (!new_bb);
3022 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3023 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3025 vect_create_destination_var (scalar_dest,
3026 gimple_call_return_type (new_stmt));
3027 new_temp = make_ssa_name (vec_dest, new_stmt);
3028 gimple_call_set_lhs (new_stmt, new_temp);
3030 if (compute_in_loop)
3031 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3034 /* Generate the misalignment computation outside LOOP. */
3035 pe = loop_preheader_edge (loop);
3036 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3037 gcc_assert (!new_bb);
3040 *realignment_token = gimple_call_lhs (new_stmt);
3042 /* The result of the CALL_EXPR to this builtin is determined from
3043 the value of the parameter and no global variables are touched
3044 which makes the builtin a "const" function. Requiring the
3045 builtin to have the "const" attribute makes it unnecessary
3046 to call mark_call_clobbered. */
3047 gcc_assert (TREE_READONLY (builtin_decl));
3050 if (alignment_support_scheme == dr_explicit_realign)
3053 gcc_assert (!compute_in_loop);
3054 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3057 /* 5. Create msq = phi <msq_init, lsq> in loop */
3059 pe = loop_preheader_edge (containing_loop);
3060 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3061 msq = make_ssa_name (vec_dest, NULL);
3062 phi_stmt = create_phi_node (msq, containing_loop->header);
3063 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3064 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3070 /* Function vect_strided_load_supported.
3072 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3073 and FALSE otherwise. */
3076 vect_strided_load_supported (tree vectype)
3078 optab perm_even_optab, perm_odd_optab;
3081 mode = (int) TYPE_MODE (vectype);
3083 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3085 if (!perm_even_optab)
3087 if (vect_print_dump_info (REPORT_DETAILS))
3088 fprintf (vect_dump, "no optab for perm_even.");
3092 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3094 if (vect_print_dump_info (REPORT_DETAILS))
3095 fprintf (vect_dump, "perm_even op not supported by target.");
3099 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3101 if (!perm_odd_optab)
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "no optab for perm_odd.");
3108 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3110 if (vect_print_dump_info (REPORT_DETAILS))
3111 fprintf (vect_dump, "perm_odd op not supported by target.");
3118 /* Function vect_permute_load_chain.
3120 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3121 a power of 2, generate extract_even/odd stmts to reorder the input data
3122 correctly. Return the final references for loads in RESULT_CHAIN.
3124 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3125 The input is 4 vectors each containing 8 elements. We assign a number to each
3126 element, the input sequence is:
3128 1st vec: 0 1 2 3 4 5 6 7
3129 2nd vec: 8 9 10 11 12 13 14 15
3130 3rd vec: 16 17 18 19 20 21 22 23
3131 4th vec: 24 25 26 27 28 29 30 31
3133 The output sequence should be:
3135 1st vec: 0 4 8 12 16 20 24 28
3136 2nd vec: 1 5 9 13 17 21 25 29
3137 3rd vec: 2 6 10 14 18 22 26 30
3138 4th vec: 3 7 11 15 19 23 27 31
3140 i.e., the first output vector should contain the first elements of each
3141 interleaving group, etc.
3143 We use extract_even/odd instructions to create such output. The input of each
3144 extract_even/odd operation is two vectors
3148 and the output is the vector of extracted even/odd elements. The output of
3149 extract_even will be: 0 2 4 6
3150 and of extract_odd: 1 3 5 7
3153 The permutation is done in log LENGTH stages. In each stage extract_even and
3154 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3155 order. In our example,
3157 E1: extract_even (1st vec, 2nd vec)
3158 E2: extract_odd (1st vec, 2nd vec)
3159 E3: extract_even (3rd vec, 4th vec)
3160 E4: extract_odd (3rd vec, 4th vec)
3162 The output for the first stage will be:
3164 E1: 0 2 4 6 8 10 12 14
3165 E2: 1 3 5 7 9 11 13 15
3166 E3: 16 18 20 22 24 26 28 30
3167 E4: 17 19 21 23 25 27 29 31
3169 In order to proceed and create the correct sequence for the next stage (or
3170 for the correct output, if the second stage is the last one, as in our
3171 example), we first put the output of extract_even operation and then the
3172 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3173 The input for the second stage is:
3175 1st vec (E1): 0 2 4 6 8 10 12 14
3176 2nd vec (E3): 16 18 20 22 24 26 28 30
3177 3rd vec (E2): 1 3 5 7 9 11 13 15
3178 4th vec (E4): 17 19 21 23 25 27 29 31
3180 The output of the second stage:
3182 E1: 0 4 8 12 16 20 24 28
3183 E2: 2 6 10 14 18 22 26 30
3184 E3: 1 5 9 13 17 21 25 29
3185 E4: 3 7 11 15 19 23 27 31
3187 And RESULT_CHAIN after reordering:
3189 1st vec (E1): 0 4 8 12 16 20 24 28
3190 2nd vec (E3): 1 5 9 13 17 21 25 29
3191 3rd vec (E2): 2 6 10 14 18 22 26 30
3192 4th vec (E4): 3 7 11 15 19 23 27 31. */
3195 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3196 unsigned int length,
3198 gimple_stmt_iterator *gsi,
3199 VEC(tree,heap) **result_chain)
3201 tree perm_dest, data_ref, first_vect, second_vect;
3203 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3207 /* Check that the operation is supported. */
3208 if (!vect_strided_load_supported (vectype))
3211 *result_chain = VEC_copy (tree, heap, dr_chain);
3212 for (i = 0; i < exact_log2 (length); i++)
3214 for (j = 0; j < length; j +=2)
3216 first_vect = VEC_index (tree, dr_chain, j);
3217 second_vect = VEC_index (tree, dr_chain, j+1);
3219 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3220 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3221 DECL_GIMPLE_REG_P (perm_dest) = 1;
3222 add_referenced_var (perm_dest);
3224 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3225 perm_dest, first_vect,
3228 data_ref = make_ssa_name (perm_dest, perm_stmt);
3229 gimple_assign_set_lhs (perm_stmt, data_ref);
3230 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3231 mark_symbols_for_renaming (perm_stmt);
3233 VEC_replace (tree, *result_chain, j/2, data_ref);
3235 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3236 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3237 DECL_GIMPLE_REG_P (perm_dest) = 1;
3238 add_referenced_var (perm_dest);
3240 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3241 perm_dest, first_vect,
3243 data_ref = make_ssa_name (perm_dest, perm_stmt);
3244 gimple_assign_set_lhs (perm_stmt, data_ref);
3245 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3246 mark_symbols_for_renaming (perm_stmt);
3248 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3250 dr_chain = VEC_copy (tree, heap, *result_chain);
3256 /* Function vect_transform_strided_load.
3258 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3259 to perform their permutation and ascribe the result vectorized statements to
3260 the scalar statements.
3264 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3265 gimple_stmt_iterator *gsi)
3267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3268 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3269 gimple next_stmt, new_stmt;
3270 VEC(tree,heap) *result_chain = NULL;
3271 unsigned int i, gap_count;
3274 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3275 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3276 vectors, that are ready for vector computation. */
3277 result_chain = VEC_alloc (tree, heap, size);
3279 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3282 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3283 Since we scan the chain starting from it's first node, their order
3284 corresponds the order of data-refs in RESULT_CHAIN. */
3285 next_stmt = first_stmt;
3287 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3292 /* Skip the gaps. Loads created for the gaps will be removed by dead
3293 code elimination pass later. No need to check for the first stmt in
3294 the group, since it always exists.
3295 DR_GROUP_GAP is the number of steps in elements from the previous
3296 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3297 correspond to the gaps.
3299 if (next_stmt != first_stmt
3300 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3308 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3309 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3310 copies, and we put the new vector statement in the first available
3312 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3313 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3316 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3319 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3321 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3324 prev_stmt = rel_stmt;
3326 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3329 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3334 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3336 /* If NEXT_STMT accesses the same DR as the previous statement,
3337 put the same TMP_DATA_REF as its vectorized statement; otherwise
3338 get the next data-ref from RESULT_CHAIN. */
3339 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3344 VEC_free (tree, heap, result_chain);
3348 /* Function vect_force_dr_alignment_p.
3350 Returns whether the alignment of a DECL can be forced to be aligned
3351 on ALIGNMENT bit boundary. */
3354 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3356 if (TREE_CODE (decl) != VAR_DECL)
3359 if (DECL_EXTERNAL (decl))
3362 if (TREE_ASM_WRITTEN (decl))
3365 if (TREE_STATIC (decl))
3366 return (alignment <= MAX_OFILE_ALIGNMENT);
3368 return (alignment <= MAX_STACK_ALIGNMENT);
3371 /* Function vect_supportable_dr_alignment
3373 Return whether the data reference DR is supported with respect to its
3376 enum dr_alignment_support
3377 vect_supportable_dr_alignment (struct data_reference *dr)
3379 gimple stmt = DR_STMT (dr);
3380 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3381 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3382 enum machine_mode mode = TYPE_MODE (vectype);
3383 bool invariant_in_outerloop = false;
3384 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3385 struct loop *vect_loop = NULL;
3386 bool nested_in_vect_loop = false;
3388 if (aligned_access_p (dr))
3392 /* FORNOW: Misaligned accesses are supported only in loops. */
3393 return dr_unaligned_unsupported;
3395 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3396 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3398 if (nested_in_vect_loop)
3400 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3401 invariant_in_outerloop =
3402 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3405 /* Possibly unaligned access. */
3407 /* We can choose between using the implicit realignment scheme (generating
3408 a misaligned_move stmt) and the explicit realignment scheme (generating
3409 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3410 realignment scheme: optimized, and unoptimized.
3411 We can optimize the realignment only if the step between consecutive
3412 vector loads is equal to the vector size. Since the vector memory
3413 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3414 is guaranteed that the misalignment amount remains the same throughout the
3415 execution of the vectorized loop. Therefore, we can create the
3416 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3417 at the loop preheader.
3419 However, in the case of outer-loop vectorization, when vectorizing a
3420 memory access in the inner-loop nested within the LOOP that is now being
3421 vectorized, while it is guaranteed that the misalignment of the
3422 vectorized memory access will remain the same in different outer-loop
3423 iterations, it is *not* guaranteed that is will remain the same throughout
3424 the execution of the inner-loop. This is because the inner-loop advances
3425 with the original scalar step (and not in steps of VS). If the inner-loop
3426 step happens to be a multiple of VS, then the misalignment remains fixed
3427 and we can use the optimized realignment scheme. For example:
3433 When vectorizing the i-loop in the above example, the step between
3434 consecutive vector loads is 1, and so the misalignment does not remain
3435 fixed across the execution of the inner-loop, and the realignment cannot
3436 be optimized (as illustrated in the following pseudo vectorized loop):
3438 for (i=0; i<N; i+=4)
3439 for (j=0; j<M; j++){
3440 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3441 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3442 // (assuming that we start from an aligned address).
3445 We therefore have to use the unoptimized realignment scheme:
3447 for (i=0; i<N; i+=4)
3448 for (j=k; j<M; j+=4)
3449 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3450 // that the misalignment of the initial address is
3453 The loop can then be vectorized as follows:
3455 for (k=0; k<4; k++){
3456 rt = get_realignment_token (&vp[k]);
3457 for (i=0; i<N; i+=4){
3459 for (j=k; j<M; j+=4){
3461 va = REALIGN_LOAD <v1,v2,rt>;
3468 if (DR_IS_READ (dr))
3470 bool is_packed = false;
3471 tree type = (TREE_TYPE (DR_REF (dr)));
3473 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3475 && (!targetm.vectorize.builtin_mask_for_load
3476 || targetm.vectorize.builtin_mask_for_load ()))
3478 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3479 if (nested_in_vect_loop
3480 && (TREE_INT_CST_LOW (DR_STEP (dr))
3481 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3482 return dr_explicit_realign;
3484 return dr_explicit_realign_optimized;
3486 if (!known_alignment_for_access_p (dr))
3488 tree ba = DR_BASE_OBJECT (dr);
3491 is_packed = contains_packed_reference (ba);
3494 if (targetm.vectorize.
3495 builtin_support_vector_misalignment (mode, type,
3496 DR_MISALIGNMENT (dr), is_packed))
3497 /* Can't software pipeline the loads, but can at least do them. */
3498 return dr_unaligned_supported;
3502 bool is_packed = false;
3503 tree type = (TREE_TYPE (DR_REF (dr)));
3505 if (!known_alignment_for_access_p (dr))
3507 tree ba = DR_BASE_OBJECT (dr);
3510 is_packed = contains_packed_reference (ba);
3513 if (targetm.vectorize.
3514 builtin_support_vector_misalignment (mode, type,
3515 DR_MISALIGNMENT (dr), is_packed))
3516 return dr_unaligned_supported;
3520 return dr_unaligned_unsupported;