1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
25 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
78 scalar_type = rhs_type;
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
93 gimple next_stmt = first_stmt;
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
99 while (next_stmt && next_stmt != stmt)
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
112 /* Function vect_insert_into_interleaving_chain.
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
147 /* Function vect_update_interleaving_chain.
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
163 NEXT(FIRST_DR) = previous FIRST_DR
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
179 gimple node, prev, next, first_stmt;
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
290 /* Function vect_equal_offsets.
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
295 vect_equal_offsets (tree offset1, tree offset2)
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
302 if (offset1 == offset2)
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
307 || !BINARY_CLASS_P (offset2))
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
311 TREE_OPERAND (offset2, 0));
312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
313 TREE_OPERAND (offset2, 1));
315 return (res0 && res1);
319 /* Function vect_check_interleaving.
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
325 vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
339 || DR_IS_READ (dra) != DR_IS_READ (drb))
343 1. data-refs are of the same type
344 2. their steps are equal
345 3. the step (if greater than zero) is greater than the difference between
347 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
348 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
350 if (type_size_a != type_size_b
351 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
352 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
353 TREE_TYPE (DR_REF (drb))))
356 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
357 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
358 step = TREE_INT_CST_LOW (DR_STEP (dra));
362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
363 and DRB is accessed before DRA. */
364 diff_mod_size = (init_a - init_b) % type_size_a;
366 if ((init_a - init_b) > step)
369 if (diff_mod_size == 0)
371 vect_update_interleaving_chain (drb, dra);
372 if (vect_print_dump_info (REPORT_DR_DETAILS))
374 fprintf (vect_dump, "Detected interleaving ");
375 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
376 fprintf (vect_dump, " and ");
377 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
384 /* If init_b == init_a + the size of the type * k, we have an
385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size = (init_b - init_a) % type_size_a;
388 if ((init_b - init_a) > step)
391 if (diff_mod_size == 0)
393 vect_update_interleaving_chain (dra, drb);
394 if (vect_print_dump_info (REPORT_DR_DETAILS))
396 fprintf (vect_dump, "Detected interleaving ");
397 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
398 fprintf (vect_dump, " and ");
399 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
408 /* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
413 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
415 gimple stmt_i = DR_STMT (dr_i);
416 gimple stmt_j = DR_STMT (dr_j);
418 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
428 /* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
432 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
434 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
435 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
436 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
437 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
443 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
448 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
450 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
455 if (vect_print_dump_info (REPORT_DR_DETAILS))
457 fprintf (vect_dump, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
459 fprintf (vect_dump, " and ");
460 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
463 if (optimize_loop_nest_for_size_p (loop))
465 if (vect_print_dump_info (REPORT_DR_DETAILS))
466 fprintf (vect_dump, "versioning not supported when optimizing for size.");
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
473 if (vect_print_dump_info (REPORT_DR_DETAILS))
474 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
482 /* Function vect_analyze_data_ref_dependence.
484 Return TRUE if there (might) exist a dependence between a memory-reference
485 DRA and a memory-reference DRB. When versioning for alias may check a
486 dependence at run-time, return FALSE. */
489 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
490 loop_vec_info loop_vinfo)
493 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
494 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
495 struct data_reference *dra = DDR_A (ddr);
496 struct data_reference *drb = DDR_B (ddr);
497 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
498 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
499 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
500 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
501 lambda_vector dist_v;
502 unsigned int loop_depth;
504 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
506 /* Independent data accesses. */
507 vect_check_interleaving (dra, drb);
511 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
514 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
516 if (vect_print_dump_info (REPORT_DR_DETAILS))
519 "versioning for alias required: can't determine dependence between ");
520 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
521 fprintf (vect_dump, " and ");
522 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
524 /* Add to list of ddrs that need to be tested at run-time. */
525 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
528 if (DDR_NUM_DIST_VECTS (ddr) == 0)
530 if (vect_print_dump_info (REPORT_DR_DETAILS))
532 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
533 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
534 fprintf (vect_dump, " and ");
535 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
537 /* Add to list of ddrs that need to be tested at run-time. */
538 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
541 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
542 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
544 int dist = dist_v[loop_depth];
546 if (vect_print_dump_info (REPORT_DR_DETAILS))
547 fprintf (vect_dump, "dependence distance = %d.", dist);
549 /* Same loop iteration. */
550 if (dist % vectorization_factor == 0 && dra_size == drb_size)
552 /* Two references with distance zero have the same alignment. */
553 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
554 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
555 if (vect_print_dump_info (REPORT_ALIGNMENT))
556 fprintf (vect_dump, "accesses have the same alignment.");
557 if (vect_print_dump_info (REPORT_DR_DETAILS))
559 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
560 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
561 fprintf (vect_dump, " and ");
562 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
565 /* For interleaving, mark that there is a read-write dependency if
566 necessary. We check before that one of the data-refs is store. */
567 if (DR_IS_READ (dra))
568 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
571 if (DR_IS_READ (drb))
572 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
578 if (abs (dist) >= vectorization_factor
579 || (dist > 0 && DDR_REVERSED_P (ddr)))
581 /* Dependence distance does not create dependence, as far as
582 vectorization is concerned, in this case. If DDR_REVERSED_P the
583 order of the data-refs in DDR was reversed (to make distance
584 vector positive), and the actual distance is negative. */
585 if (vect_print_dump_info (REPORT_DR_DETAILS))
586 fprintf (vect_dump, "dependence distance >= VF or negative.");
590 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
593 "not vectorized, possible dependence "
594 "between data-refs ");
595 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
596 fprintf (vect_dump, " and ");
597 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
606 /* Function vect_analyze_data_ref_dependences.
608 Examine all the data references in the loop, and make sure there do not
609 exist any data dependences between them. */
612 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
615 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
616 struct data_dependence_relation *ddr;
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "=== vect_analyze_dependences ===");
621 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
622 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
629 /* Function vect_compute_data_ref_alignment
631 Compute the misalignment of the data reference DR.
634 1. If during the misalignment computation it is found that the data reference
635 cannot be vectorized then false is returned.
636 2. DR_MISALIGNMENT (DR) is defined.
638 FOR NOW: No analysis is actually performed. Misalignment is calculated
639 only for trivial cases. TODO. */
642 vect_compute_data_ref_alignment (struct data_reference *dr)
644 gimple stmt = DR_STMT (dr);
645 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
646 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
647 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
648 tree ref = DR_REF (dr);
650 tree base, base_addr;
653 tree aligned_to, alignment;
655 if (vect_print_dump_info (REPORT_DETAILS))
656 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
658 /* Initialize misalignment to unknown. */
659 SET_DR_MISALIGNMENT (dr, -1);
661 misalign = DR_INIT (dr);
662 aligned_to = DR_ALIGNED_TO (dr);
663 base_addr = DR_BASE_ADDRESS (dr);
664 vectype = STMT_VINFO_VECTYPE (stmt_info);
666 /* In case the dataref is in an inner-loop of the loop that is being
667 vectorized (LOOP), we use the base and misalignment information
668 relative to the outer-loop (LOOP). This is ok only if the misalignment
669 stays the same throughout the execution of the inner-loop, which is why
670 we have to check that the stride of the dataref in the inner-loop evenly
671 divides by the vector size. */
672 if (nested_in_vect_loop_p (loop, stmt))
674 tree step = DR_STEP (dr);
675 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
677 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
679 if (vect_print_dump_info (REPORT_ALIGNMENT))
680 fprintf (vect_dump, "inner step divides the vector-size.");
681 misalign = STMT_VINFO_DR_INIT (stmt_info);
682 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
683 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
687 if (vect_print_dump_info (REPORT_ALIGNMENT))
688 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
689 misalign = NULL_TREE;
693 base = build_fold_indirect_ref (base_addr);
694 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
696 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
699 if (vect_print_dump_info (REPORT_ALIGNMENT))
701 fprintf (vect_dump, "Unknown alignment for access: ");
702 print_generic_expr (vect_dump, base, TDF_SLIM);
708 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
710 || (TREE_CODE (base_addr) == SSA_NAME
711 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
712 TREE_TYPE (base_addr)))),
716 base_aligned = false;
720 /* Do not change the alignment of global variables if
721 flag_section_anchors is enabled. */
722 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
723 || (TREE_STATIC (base) && flag_section_anchors))
725 if (vect_print_dump_info (REPORT_DETAILS))
727 fprintf (vect_dump, "can't force alignment of ref: ");
728 print_generic_expr (vect_dump, ref, TDF_SLIM);
733 /* Force the alignment of the decl.
734 NOTE: This is the only change to the code we make during
735 the analysis phase, before deciding to vectorize the loop. */
736 if (vect_print_dump_info (REPORT_DETAILS))
737 fprintf (vect_dump, "force alignment");
738 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
739 DECL_USER_ALIGN (base) = 1;
742 /* At this point we assume that the base is aligned. */
743 gcc_assert (base_aligned
744 || (TREE_CODE (base) == VAR_DECL
745 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
747 /* Modulo alignment. */
748 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
750 if (!host_integerp (misalign, 1))
752 /* Negative or overflowed misalignment value. */
753 if (vect_print_dump_info (REPORT_DETAILS))
754 fprintf (vect_dump, "unexpected misalign value");
758 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
760 if (vect_print_dump_info (REPORT_DETAILS))
762 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
763 print_generic_expr (vect_dump, ref, TDF_SLIM);
770 /* Function vect_compute_data_refs_alignment
772 Compute the misalignment of data references in the loop.
773 Return FALSE if a data reference is found that cannot be vectorized. */
776 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
778 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
779 struct data_reference *dr;
782 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
783 if (!vect_compute_data_ref_alignment (dr))
790 /* Function vect_update_misalignment_for_peel
792 DR - the data reference whose misalignment is to be adjusted.
793 DR_PEEL - the data reference whose misalignment is being made
794 zero in the vector loop by the peel.
795 NPEEL - the number of iterations in the peel loop if the misalignment
796 of DR_PEEL is known at compile time. */
799 vect_update_misalignment_for_peel (struct data_reference *dr,
800 struct data_reference *dr_peel, int npeel)
803 VEC(dr_p,heap) *same_align_drs;
804 struct data_reference *current_dr;
805 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
806 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
807 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
808 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
810 /* For interleaved data accesses the step in the loop must be multiplied by
811 the size of the interleaving group. */
812 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
813 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
814 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
815 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
817 /* It can be assumed that the data refs with the same alignment as dr_peel
818 are aligned in the vector loop. */
820 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
821 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
823 if (current_dr != dr)
825 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
826 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
827 SET_DR_MISALIGNMENT (dr, 0);
831 if (known_alignment_for_access_p (dr)
832 && known_alignment_for_access_p (dr_peel))
834 int misal = DR_MISALIGNMENT (dr);
835 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
836 misal += npeel * dr_size;
837 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
838 SET_DR_MISALIGNMENT (dr, misal);
842 if (vect_print_dump_info (REPORT_DETAILS))
843 fprintf (vect_dump, "Setting misalignment to -1.");
844 SET_DR_MISALIGNMENT (dr, -1);
848 /* Function vect_verify_datarefs_alignment
850 Return TRUE if all data references in the loop can be
851 handled with respect to alignment. */
854 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
856 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
857 struct data_reference *dr;
858 enum dr_alignment_support supportable_dr_alignment;
861 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
863 gimple stmt = DR_STMT (dr);
864 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
866 /* For interleaving, only the alignment of the first access matters. */
867 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
868 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
871 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
872 if (!supportable_dr_alignment)
874 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
878 "not vectorized: unsupported unaligned load.");
881 "not vectorized: unsupported unaligned store.");
885 if (supportable_dr_alignment != dr_aligned
886 && vect_print_dump_info (REPORT_ALIGNMENT))
887 fprintf (vect_dump, "Vectorizing an unaligned access.");
893 /* Function vector_alignment_reachable_p
895 Return true if vector alignment for DR is reachable by peeling
896 a few loop iterations. Return false otherwise. */
899 vector_alignment_reachable_p (struct data_reference *dr)
901 gimple stmt = DR_STMT (dr);
902 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
903 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
905 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
907 /* For interleaved access we peel only if number of iterations in
908 the prolog loop ({VF - misalignment}), is a multiple of the
909 number of the interleaved accesses. */
910 int elem_size, mis_in_elements;
911 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
913 /* FORNOW: handle only known alignment. */
914 if (!known_alignment_for_access_p (dr))
917 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
918 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
920 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
924 /* If misalignment is known at the compile time then allow peeling
925 only if natural alignment is reachable through peeling. */
926 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
928 HOST_WIDE_INT elmsize =
929 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
930 if (vect_print_dump_info (REPORT_DETAILS))
932 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
933 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
935 if (DR_MISALIGNMENT (dr) % elmsize)
937 if (vect_print_dump_info (REPORT_DETAILS))
938 fprintf (vect_dump, "data size does not divide the misalignment.\n");
943 if (!known_alignment_for_access_p (dr))
945 tree type = (TREE_TYPE (DR_REF (dr)));
946 tree ba = DR_BASE_OBJECT (dr);
947 bool is_packed = false;
950 is_packed = contains_packed_reference (ba);
952 if (vect_print_dump_info (REPORT_DETAILS))
953 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
954 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
963 /* Function vect_enhance_data_refs_alignment
965 This pass will use loop versioning and loop peeling in order to enhance
966 the alignment of data references in the loop.
968 FOR NOW: we assume that whatever versioning/peeling takes place, only the
969 original loop is to be vectorized; Any other loops that are created by
970 the transformations performed in this pass - are not supposed to be
971 vectorized. This restriction will be relaxed.
973 This pass will require a cost model to guide it whether to apply peeling
974 or versioning or a combination of the two. For example, the scheme that
975 intel uses when given a loop with several memory accesses, is as follows:
976 choose one memory access ('p') which alignment you want to force by doing
977 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
978 other accesses are not necessarily aligned, or (2) use loop versioning to
979 generate one loop in which all accesses are aligned, and another loop in
980 which only 'p' is necessarily aligned.
982 ("Automatic Intra-Register Vectorization for the Intel Architecture",
983 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
984 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
986 Devising a cost model is the most critical aspect of this work. It will
987 guide us on which access to peel for, whether to use loop versioning, how
988 many versions to create, etc. The cost model will probably consist of
989 generic considerations as well as target specific considerations (on
990 powerpc for example, misaligned stores are more painful than misaligned
993 Here are the general steps involved in alignment enhancements:
995 -- original loop, before alignment analysis:
997 x = q[i]; # DR_MISALIGNMENT(q) = unknown
998 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1001 -- After vect_compute_data_refs_alignment:
1002 for (i=0; i<N; i++){
1003 x = q[i]; # DR_MISALIGNMENT(q) = 3
1004 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1007 -- Possibility 1: we do loop versioning:
1009 for (i=0; i<N; i++){ # loop 1A
1010 x = q[i]; # DR_MISALIGNMENT(q) = 3
1011 p[i] = y; # DR_MISALIGNMENT(p) = 0
1015 for (i=0; i<N; i++){ # loop 1B
1016 x = q[i]; # DR_MISALIGNMENT(q) = 3
1017 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1021 -- Possibility 2: we do loop peeling:
1022 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1026 for (i = 3; i < N; i++){ # loop 2A
1027 x = q[i]; # DR_MISALIGNMENT(q) = 0
1028 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1031 -- Possibility 3: combination of loop peeling and versioning:
1032 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1037 for (i = 3; i<N; i++){ # loop 3A
1038 x = q[i]; # DR_MISALIGNMENT(q) = 0
1039 p[i] = y; # DR_MISALIGNMENT(p) = 0
1043 for (i = 3; i<N; i++){ # loop 3B
1044 x = q[i]; # DR_MISALIGNMENT(q) = 0
1045 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1049 These loops are later passed to loop_transform to be vectorized. The
1050 vectorizer will use the alignment information to guide the transformation
1051 (whether to generate regular loads/stores, or with special handling for
1055 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1057 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1058 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1059 enum dr_alignment_support supportable_dr_alignment;
1060 struct data_reference *dr0 = NULL;
1061 struct data_reference *dr;
1063 bool do_peeling = false;
1064 bool do_versioning = false;
1067 stmt_vec_info stmt_info;
1068 int vect_versioning_for_alias_required;
1070 if (vect_print_dump_info (REPORT_DETAILS))
1071 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1073 /* While cost model enhancements are expected in the future, the high level
1074 view of the code at this time is as follows:
1076 A) If there is a misaligned write then see if peeling to align this write
1077 can make all data references satisfy vect_supportable_dr_alignment.
1078 If so, update data structures as needed and return true. Note that
1079 at this time vect_supportable_dr_alignment is known to return false
1080 for a misaligned write.
1082 B) If peeling wasn't possible and there is a data reference with an
1083 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1084 then see if loop versioning checks can be used to make all data
1085 references satisfy vect_supportable_dr_alignment. If so, update
1086 data structures as needed and return true.
1088 C) If neither peeling nor versioning were successful then return false if
1089 any data reference does not satisfy vect_supportable_dr_alignment.
1091 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1093 Note, Possibility 3 above (which is peeling and versioning together) is not
1094 being done at this time. */
1096 /* (1) Peeling to force alignment. */
1098 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1100 + How many accesses will become aligned due to the peeling
1101 - How many accesses will become unaligned due to the peeling,
1102 and the cost of misaligned accesses.
1103 - The cost of peeling (the extra runtime checks, the increase
1106 The scheme we use FORNOW: peel to force the alignment of the first
1107 misaligned store in the loop.
1108 Rationale: misaligned stores are not yet supported.
1110 TODO: Use a cost model. */
1112 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1114 stmt = DR_STMT (dr);
1115 stmt_info = vinfo_for_stmt (stmt);
1117 /* For interleaving, only the alignment of the first access
1119 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1120 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1123 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1125 do_peeling = vector_alignment_reachable_p (dr);
1128 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1129 fprintf (vect_dump, "vector alignment may not be reachable");
1134 vect_versioning_for_alias_required =
1135 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1137 /* Temporarily, if versioning for alias is required, we disable peeling
1138 until we support peeling and versioning. Often peeling for alignment
1139 will require peeling for loop-bound, which in turn requires that we
1140 know how to adjust the loop ivs after the loop. */
1141 if (vect_versioning_for_alias_required
1142 || !vect_can_advance_ivs_p (loop_vinfo)
1143 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1150 gimple stmt = DR_STMT (dr0);
1151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1152 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1153 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1155 if (known_alignment_for_access_p (dr0))
1157 /* Since it's known at compile time, compute the number of iterations
1158 in the peeled loop (the peeling factor) for use in updating
1159 DR_MISALIGNMENT values. The peeling factor is the vectorization
1160 factor minus the misalignment as an element count. */
1161 mis = DR_MISALIGNMENT (dr0);
1162 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1163 npeel = nelements - mis;
1165 /* For interleaved data access every iteration accesses all the
1166 members of the group, therefore we divide the number of iterations
1167 by the group size. */
1168 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1169 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1170 npeel /= DR_GROUP_SIZE (stmt_info);
1172 if (vect_print_dump_info (REPORT_DETAILS))
1173 fprintf (vect_dump, "Try peeling by %d", npeel);
1176 /* Ensure that all data refs can be vectorized after the peel. */
1177 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1179 int save_misalignment;
1184 stmt = DR_STMT (dr);
1185 stmt_info = vinfo_for_stmt (stmt);
1186 /* For interleaving, only the alignment of the first access
1188 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1189 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1192 save_misalignment = DR_MISALIGNMENT (dr);
1193 vect_update_misalignment_for_peel (dr, dr0, npeel);
1194 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1195 SET_DR_MISALIGNMENT (dr, save_misalignment);
1197 if (!supportable_dr_alignment)
1206 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1207 If the misalignment of DR_i is identical to that of dr0 then set
1208 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1209 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1210 by the peeling factor times the element size of DR_i (MOD the
1211 vectorization factor times the size). Otherwise, the
1212 misalignment of DR_i must be set to unknown. */
1213 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1215 vect_update_misalignment_for_peel (dr, dr0, npeel);
1217 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1218 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1219 SET_DR_MISALIGNMENT (dr0, 0);
1220 if (vect_print_dump_info (REPORT_ALIGNMENT))
1221 fprintf (vect_dump, "Alignment of access forced using peeling.");
1223 if (vect_print_dump_info (REPORT_DETAILS))
1224 fprintf (vect_dump, "Peeling for alignment will be applied.");
1226 stat = vect_verify_datarefs_alignment (loop_vinfo);
1233 /* (2) Versioning to force alignment. */
1235 /* Try versioning if:
1236 1) flag_tree_vect_loop_version is TRUE
1237 2) optimize loop for speed
1238 3) there is at least one unsupported misaligned data ref with an unknown
1240 4) all misaligned data refs with a known misalignment are supported, and
1241 5) the number of runtime alignment checks is within reason. */
1244 flag_tree_vect_loop_version
1245 && optimize_loop_nest_for_speed_p (loop)
1246 && (!loop->inner); /* FORNOW */
1250 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1252 stmt = DR_STMT (dr);
1253 stmt_info = vinfo_for_stmt (stmt);
1255 /* For interleaving, only the alignment of the first access
1257 if (aligned_access_p (dr)
1258 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1259 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1262 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1264 if (!supportable_dr_alignment)
1270 if (known_alignment_for_access_p (dr)
1271 || VEC_length (gimple,
1272 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1273 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1275 do_versioning = false;
1279 stmt = DR_STMT (dr);
1280 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1281 gcc_assert (vectype);
1283 /* The rightmost bits of an aligned address must be zeros.
1284 Construct the mask needed for this test. For example,
1285 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1286 mask must be 15 = 0xf. */
1287 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1289 /* FORNOW: use the same mask to test all potentially unaligned
1290 references in the loop. The vectorizer currently supports
1291 a single vector size, see the reference to
1292 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1293 vectorization factor is computed. */
1294 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1295 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1296 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1297 VEC_safe_push (gimple, heap,
1298 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1303 /* Versioning requires at least one misaligned data reference. */
1304 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1305 do_versioning = false;
1306 else if (!do_versioning)
1307 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1312 VEC(gimple,heap) *may_misalign_stmts
1313 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1316 /* It can now be assumed that the data references in the statements
1317 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1318 of the loop being vectorized. */
1319 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1321 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1322 dr = STMT_VINFO_DATA_REF (stmt_info);
1323 SET_DR_MISALIGNMENT (dr, 0);
1324 if (vect_print_dump_info (REPORT_ALIGNMENT))
1325 fprintf (vect_dump, "Alignment of access forced using versioning.");
1328 if (vect_print_dump_info (REPORT_DETAILS))
1329 fprintf (vect_dump, "Versioning for alignment will be applied.");
1331 /* Peeling and versioning can't be done together at this time. */
1332 gcc_assert (! (do_peeling && do_versioning));
1334 stat = vect_verify_datarefs_alignment (loop_vinfo);
1339 /* This point is reached if neither peeling nor versioning is being done. */
1340 gcc_assert (! (do_peeling || do_versioning));
1342 stat = vect_verify_datarefs_alignment (loop_vinfo);
1347 /* Function vect_analyze_data_refs_alignment
1349 Analyze the alignment of the data-references in the loop.
1350 Return FALSE if a data reference is found that cannot be vectorized. */
1353 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1355 if (vect_print_dump_info (REPORT_DETAILS))
1356 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1358 if (!vect_compute_data_refs_alignment (loop_vinfo))
1360 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1362 "not vectorized: can't calculate alignment for data ref.");
1370 /* Analyze groups of strided accesses: check that DR belongs to a group of
1371 strided accesses of legal size, step, etc. Detect gaps, single element
1372 interleaving, and other special cases. Set strided access info.
1373 Collect groups of strided stores for further use in SLP analysis. */
1376 vect_analyze_group_access (struct data_reference *dr)
1378 tree step = DR_STEP (dr);
1379 tree scalar_type = TREE_TYPE (DR_REF (dr));
1380 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1381 gimple stmt = DR_STMT (dr);
1382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1384 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1385 HOST_WIDE_INT stride;
1386 bool slp_impossible = false;
1388 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1389 interleaving group (including gaps). */
1390 stride = dr_step / type_size;
1392 /* Not consecutive access is possible only if it is a part of interleaving. */
1393 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1395 /* Check if it this DR is a part of interleaving, and is a single
1396 element of the group that is accessed in the loop. */
1398 /* Gaps are supported only for loads. STEP must be a multiple of the type
1399 size. The size of the group must be a power of 2. */
1401 && (dr_step % type_size) == 0
1403 && exact_log2 (stride) != -1)
1405 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1406 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1407 if (vect_print_dump_info (REPORT_DR_DETAILS))
1409 fprintf (vect_dump, "Detected single element interleaving %d ",
1410 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1411 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1412 fprintf (vect_dump, " step ");
1413 print_generic_expr (vect_dump, step, TDF_SLIM);
1417 if (vect_print_dump_info (REPORT_DETAILS))
1418 fprintf (vect_dump, "not consecutive access");
1422 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1424 /* First stmt in the interleaving chain. Check the chain. */
1425 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1426 struct data_reference *data_ref = dr;
1427 unsigned int count = 1;
1429 tree prev_init = DR_INIT (data_ref);
1431 HOST_WIDE_INT diff, count_in_bytes;
1435 /* Skip same data-refs. In case that two or more stmts share data-ref
1436 (supported only for loads), we vectorize only the first stmt, and
1437 the rest get their vectorized loads from the first one. */
1438 if (!tree_int_cst_compare (DR_INIT (data_ref),
1439 DR_INIT (STMT_VINFO_DATA_REF (
1440 vinfo_for_stmt (next)))))
1442 if (!DR_IS_READ (data_ref))
1444 if (vect_print_dump_info (REPORT_DETAILS))
1445 fprintf (vect_dump, "Two store stmts share the same dr.");
1449 /* Check that there is no load-store dependencies for this loads
1450 to prevent a case of load-store-load to the same location. */
1451 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1452 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1454 if (vect_print_dump_info (REPORT_DETAILS))
1456 "READ_WRITE dependence in interleaving.");
1460 /* For load use the same data-ref load. */
1461 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1464 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1469 /* Check that all the accesses have the same STEP. */
1470 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1471 if (tree_int_cst_compare (step, next_step))
1473 if (vect_print_dump_info (REPORT_DETAILS))
1474 fprintf (vect_dump, "not consecutive access in interleaving");
1478 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1479 /* Check that the distance between two accesses is equal to the type
1480 size. Otherwise, we have gaps. */
1481 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1482 - TREE_INT_CST_LOW (prev_init)) / type_size;
1485 /* FORNOW: SLP of accesses with gaps is not supported. */
1486 slp_impossible = true;
1487 if (!DR_IS_READ (data_ref))
1489 if (vect_print_dump_info (REPORT_DETAILS))
1490 fprintf (vect_dump, "interleaved store with gaps");
1495 /* Store the gap from the previous member of the group. If there is no
1496 gap in the access, DR_GROUP_GAP is always 1. */
1497 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1499 prev_init = DR_INIT (data_ref);
1500 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1501 /* Count the number of data-refs in the chain. */
1505 /* COUNT is the number of accesses found, we multiply it by the size of
1506 the type to get COUNT_IN_BYTES. */
1507 count_in_bytes = type_size * count;
1509 /* Check that the size of the interleaving is not greater than STEP. */
1510 if (dr_step < count_in_bytes)
1512 if (vect_print_dump_info (REPORT_DETAILS))
1514 fprintf (vect_dump, "interleaving size is greater than step for ");
1515 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1520 /* Check that the size of the interleaving is equal to STEP for stores,
1521 i.e., that there are no gaps. */
1522 if (dr_step != count_in_bytes)
1524 if (DR_IS_READ (dr))
1526 slp_impossible = true;
1527 /* There is a gap after the last load in the group. This gap is a
1528 difference between the stride and the number of elements. When
1529 there is no gap, this difference should be 0. */
1530 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1534 if (vect_print_dump_info (REPORT_DETAILS))
1535 fprintf (vect_dump, "interleaved store with gaps");
1540 /* Check that STEP is a multiple of type size. */
1541 if ((dr_step % type_size) != 0)
1543 if (vect_print_dump_info (REPORT_DETAILS))
1545 fprintf (vect_dump, "step is not a multiple of type size: step ");
1546 print_generic_expr (vect_dump, step, TDF_SLIM);
1547 fprintf (vect_dump, " size ");
1548 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1554 /* FORNOW: we handle only interleaving that is a power of 2.
1555 We don't fail here if it may be still possible to vectorize the
1556 group using SLP. If not, the size of the group will be checked in
1557 vect_analyze_operations, and the vectorization will fail. */
1558 if (exact_log2 (stride) == -1)
1560 if (vect_print_dump_info (REPORT_DETAILS))
1561 fprintf (vect_dump, "interleaving is not a power of 2");
1566 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1567 if (vect_print_dump_info (REPORT_DETAILS))
1568 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1570 /* SLP: create an SLP data structure for every interleaving group of
1571 stores for further analysis in vect_analyse_slp. */
1572 if (!DR_IS_READ (dr) && !slp_impossible)
1573 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
1580 /* Analyze the access pattern of the data-reference DR.
1581 In case of non-consecutive accesses call vect_analyze_group_access() to
1582 analyze groups of strided accesses. */
1585 vect_analyze_data_ref_access (struct data_reference *dr)
1587 tree step = DR_STEP (dr);
1588 tree scalar_type = TREE_TYPE (DR_REF (dr));
1589 gimple stmt = DR_STMT (dr);
1590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1591 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1592 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1593 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1597 if (vect_print_dump_info (REPORT_DETAILS))
1598 fprintf (vect_dump, "bad data-ref access");
1602 /* Don't allow invariant accesses. */
1606 if (nested_in_vect_loop_p (loop, stmt))
1608 /* Interleaved accesses are not yet supported within outer-loop
1609 vectorization for references in the inner-loop. */
1610 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1612 /* For the rest of the analysis we use the outer-loop step. */
1613 step = STMT_VINFO_DR_STEP (stmt_info);
1614 dr_step = TREE_INT_CST_LOW (step);
1618 if (vect_print_dump_info (REPORT_ALIGNMENT))
1619 fprintf (vect_dump, "zero step in outer loop.");
1620 if (DR_IS_READ (dr))
1628 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1630 /* Mark that it is not interleaving. */
1631 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1635 if (nested_in_vect_loop_p (loop, stmt))
1637 if (vect_print_dump_info (REPORT_ALIGNMENT))
1638 fprintf (vect_dump, "strided access in outer loop.");
1642 /* Not consecutive access - check if it's a part of interleaving group. */
1643 return vect_analyze_group_access (dr);
1647 /* Function vect_analyze_data_ref_accesses.
1649 Analyze the access pattern of all the data references in the loop.
1651 FORNOW: the only access pattern that is considered vectorizable is a
1652 simple step 1 (consecutive) access.
1654 FORNOW: handle only arrays and pointer accesses. */
1657 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1660 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1661 struct data_reference *dr;
1663 if (vect_print_dump_info (REPORT_DETAILS))
1664 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1666 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1667 if (!vect_analyze_data_ref_access (dr))
1669 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1670 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1677 /* Function vect_prune_runtime_alias_test_list.
1679 Prune a list of ddrs to be tested at run-time by versioning for alias.
1680 Return FALSE if resulting list of ddrs is longer then allowed by
1681 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1684 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1686 VEC (ddr_p, heap) * ddrs =
1687 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1690 if (vect_print_dump_info (REPORT_DETAILS))
1691 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1693 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1698 ddr_i = VEC_index (ddr_p, ddrs, i);
1701 for (j = 0; j < i; j++)
1703 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1705 if (vect_vfa_range_equal (ddr_i, ddr_j))
1707 if (vect_print_dump_info (REPORT_DR_DETAILS))
1709 fprintf (vect_dump, "found equal ranges ");
1710 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1711 fprintf (vect_dump, ", ");
1712 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1713 fprintf (vect_dump, " and ");
1714 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1715 fprintf (vect_dump, ", ");
1716 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1725 VEC_ordered_remove (ddr_p, ddrs, i);
1731 if (VEC_length (ddr_p, ddrs) >
1732 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1734 if (vect_print_dump_info (REPORT_DR_DETAILS))
1737 "disable versioning for alias - max number of generated "
1738 "checks exceeded.");
1741 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1750 /* Function vect_analyze_data_refs.
1752 Find all the data references in the loop.
1754 The general structure of the analysis of data refs in the vectorizer is as
1756 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1757 find and analyze all data-refs in the loop and their dependences.
1758 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1759 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1760 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1765 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1767 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1769 VEC (data_reference_p, heap) *datarefs;
1770 struct data_reference *dr;
1773 if (vect_print_dump_info (REPORT_DETAILS))
1774 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1776 compute_data_dependences_for_loop (loop, true,
1777 &LOOP_VINFO_DATAREFS (loop_vinfo),
1778 &LOOP_VINFO_DDRS (loop_vinfo));
1780 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1781 from stmt_vec_info struct to DR and vectype. */
1782 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1784 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1787 stmt_vec_info stmt_info;
1789 tree base, offset, init;
1791 if (!dr || !DR_REF (dr))
1793 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1794 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1798 stmt = DR_STMT (dr);
1799 stmt_info = vinfo_for_stmt (stmt);
1801 /* Check that analysis of the data-ref succeeded. */
1802 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1805 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1807 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1808 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1813 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1815 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1816 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1821 base = unshare_expr (DR_BASE_ADDRESS (dr));
1822 offset = unshare_expr (DR_OFFSET (dr));
1823 init = unshare_expr (DR_INIT (dr));
1825 /* Update DR field in stmt_vec_info struct. */
1826 bb = gimple_bb (stmt);
1828 /* If the dataref is in an inner-loop of the loop that is considered for
1829 for vectorization, we also want to analyze the access relative to
1830 the outer-loop (DR contains information only relative to the
1831 inner-most enclosing loop). We do that by building a reference to the
1832 first location accessed by the inner-loop, and analyze it relative to
1834 if (nested_in_vect_loop_p (loop, stmt))
1836 tree outer_step, outer_base, outer_init;
1837 HOST_WIDE_INT pbitsize, pbitpos;
1839 enum machine_mode pmode;
1840 int punsignedp, pvolatilep;
1841 affine_iv base_iv, offset_iv;
1844 /* Build a reference to the first location accessed by the
1845 inner-loop: *(BASE+INIT). (The first location is actually
1846 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1847 tree inner_base = build_fold_indirect_ref
1848 (fold_build2 (POINTER_PLUS_EXPR,
1849 TREE_TYPE (base), base,
1850 fold_convert (sizetype, init)));
1852 if (vect_print_dump_info (REPORT_DETAILS))
1854 fprintf (vect_dump, "analyze in outer-loop: ");
1855 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1858 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1859 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1860 gcc_assert (outer_base != NULL_TREE);
1862 if (pbitpos % BITS_PER_UNIT != 0)
1864 if (vect_print_dump_info (REPORT_DETAILS))
1865 fprintf (vect_dump, "failed: bit offset alignment.\n");
1869 outer_base = build_fold_addr_expr (outer_base);
1870 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1873 if (vect_print_dump_info (REPORT_DETAILS))
1874 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1881 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1889 offset_iv.base = ssize_int (0);
1890 offset_iv.step = ssize_int (0);
1892 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1895 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump, "evolution of offset is not affine.\n");
1900 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
1901 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1902 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
1903 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1904 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
1906 outer_step = size_binop (PLUS_EXPR,
1907 fold_convert (ssizetype, base_iv.step),
1908 fold_convert (ssizetype, offset_iv.step));
1910 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
1911 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
1912 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
1913 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
1914 STMT_VINFO_DR_OFFSET (stmt_info) =
1915 fold_convert (ssizetype, offset_iv.base);
1916 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
1917 size_int (highest_pow2_factor (offset_iv.base));
1919 if (vect_print_dump_info (REPORT_DETAILS))
1921 fprintf (vect_dump, "\touter base_address: ");
1922 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
1923 fprintf (vect_dump, "\n\touter offset from base address: ");
1924 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
1925 fprintf (vect_dump, "\n\touter constant offset from base address: ");
1926 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
1927 fprintf (vect_dump, "\n\touter step: ");
1928 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
1929 fprintf (vect_dump, "\n\touter aligned to: ");
1930 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
1934 if (STMT_VINFO_DATA_REF (stmt_info))
1936 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1939 "not vectorized: more than one data ref in stmt: ");
1940 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1945 STMT_VINFO_DATA_REF (stmt_info) = dr;
1947 /* Set vectype for STMT. */
1948 scalar_type = TREE_TYPE (DR_REF (dr));
1949 STMT_VINFO_VECTYPE (stmt_info) =
1950 get_vectype_for_scalar_type (scalar_type);
1951 if (!STMT_VINFO_VECTYPE (stmt_info))
1953 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1956 "not vectorized: no vectype for stmt: ");
1957 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1958 fprintf (vect_dump, " scalar_type: ");
1959 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1969 /* Function vect_get_new_vect_var.
1971 Returns a name for a new variable. The current naming scheme appends the
1972 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1973 the name of vectorizer generated variables, and appends that to NAME if
1977 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1984 case vect_simple_var:
1987 case vect_scalar_var:
1990 case vect_pointer_var:
1999 char* tmp = concat (prefix, name, NULL);
2000 new_vect_var = create_tmp_var (type, tmp);
2004 new_vect_var = create_tmp_var (type, prefix);
2006 /* Mark vector typed variable as a gimple register variable. */
2007 if (TREE_CODE (type) == VECTOR_TYPE)
2008 DECL_GIMPLE_REG_P (new_vect_var) = true;
2010 return new_vect_var;
2014 /* Function vect_create_addr_base_for_vector_ref.
2016 Create an expression that computes the address of the first memory location
2017 that will be accessed for a data reference.
2020 STMT: The statement containing the data reference.
2021 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2022 OFFSET: Optional. If supplied, it is be added to the initial address.
2023 LOOP: Specify relative to which loop-nest should the address be computed.
2024 For example, when the dataref is in an inner-loop nested in an
2025 outer-loop that is now being vectorized, LOOP can be either the
2026 outer-loop, or the inner-loop. The first memory location accessed
2027 by the following dataref ('in' points to short):
2034 if LOOP=i_loop: &in (relative to i_loop)
2035 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2038 1. Return an SSA_NAME whose value is the address of the memory location of
2039 the first vector of the data reference.
2040 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2041 these statement(s) which define the returned SSA_NAME.
2043 FORNOW: We are only handling array accesses with step 1. */
2046 vect_create_addr_base_for_vector_ref (gimple stmt,
2047 gimple_seq *new_stmt_list,
2051 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2052 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2053 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2054 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2056 tree data_ref_base_var;
2058 tree addr_base, addr_expr;
2060 gimple_seq seq = NULL;
2061 tree base_offset = unshare_expr (DR_OFFSET (dr));
2062 tree init = unshare_expr (DR_INIT (dr));
2064 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2067 if (loop != containing_loop)
2069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2070 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2072 gcc_assert (nested_in_vect_loop_p (loop, stmt));
2074 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2075 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2076 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2079 /* Create data_ref_base */
2080 base_name = build_fold_indirect_ref (data_ref_base);
2081 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2082 add_referenced_var (data_ref_base_var);
2083 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2085 gimple_seq_add_seq (new_stmt_list, seq);
2087 /* Create base_offset */
2088 base_offset = size_binop (PLUS_EXPR,
2089 fold_convert (sizetype, base_offset),
2090 fold_convert (sizetype, init));
2091 dest = create_tmp_var (sizetype, "base_off");
2092 add_referenced_var (dest);
2093 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2094 gimple_seq_add_seq (new_stmt_list, seq);
2098 tree tmp = create_tmp_var (sizetype, "offset");
2100 add_referenced_var (tmp);
2101 offset = fold_build2 (MULT_EXPR, sizetype,
2102 fold_convert (sizetype, offset), step);
2103 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2104 base_offset, offset);
2105 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2106 gimple_seq_add_seq (new_stmt_list, seq);
2109 /* base + base_offset */
2110 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2111 data_ref_base, base_offset);
2113 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2115 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2116 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2117 get_name (base_name));
2119 add_referenced_var (addr_expr);
2120 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2121 gimple_seq_add_seq (new_stmt_list, seq);
2123 if (vect_print_dump_info (REPORT_DETAILS))
2125 fprintf (vect_dump, "created ");
2126 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2133 /* Function vect_create_data_ref_ptr.
2135 Create a new pointer to vector type (vp), that points to the first location
2136 accessed in the loop by STMT, along with the def-use update chain to
2137 appropriately advance the pointer through the loop iterations. Also set
2138 aliasing information for the pointer. This vector pointer is used by the
2139 callers to this function to create a memory reference expression for vector
2143 1. STMT: a stmt that references memory. Expected to be of the form
2144 GIMPLE_ASSIGN <name, data-ref> or
2145 GIMPLE_ASSIGN <data-ref, name>.
2146 2. AT_LOOP: the loop where the vector memref is to be created.
2147 3. OFFSET (optional): an offset to be added to the initial address accessed
2148 by the data-ref in STMT.
2149 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2150 pointing to the initial address.
2151 5. TYPE: if not NULL indicates the required type of the data-ref.
2154 1. Declare a new ptr to vector_type, and have it point to the base of the
2155 data reference (initial addressed accessed by the data reference).
2156 For example, for vector of type V8HI, the following code is generated:
2159 vp = (v8hi *)initial_address;
2161 if OFFSET is not supplied:
2162 initial_address = &a[init];
2163 if OFFSET is supplied:
2164 initial_address = &a[init + OFFSET];
2166 Return the initial_address in INITIAL_ADDRESS.
2168 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2169 update the pointer in each iteration of the loop.
2171 Return the increment stmt that updates the pointer in PTR_INCR.
2173 3. Set INV_P to true if the access pattern of the data reference in the
2174 vectorized loop is invariant. Set it to false otherwise.
2176 4. Return the pointer. */
2179 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2180 tree offset, tree *initial_address, gimple *ptr_incr,
2181 bool only_init, bool *inv_p)
2184 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2185 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2186 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2187 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2188 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2189 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2194 gimple_seq new_stmt_list = NULL;
2198 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2200 gimple_stmt_iterator incr_gsi;
2202 tree indx_before_incr, indx_after_incr;
2206 /* Check the step (evolution) of the load in LOOP, and record
2207 whether it's invariant. */
2208 if (nested_in_vect_loop)
2209 step = STMT_VINFO_DR_STEP (stmt_info);
2211 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2213 if (tree_int_cst_compare (step, size_zero_node) == 0)
2218 /* Create an expression for the first address accessed by this load
2220 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2222 if (vect_print_dump_info (REPORT_DETAILS))
2224 tree data_ref_base = base_name;
2225 fprintf (vect_dump, "create vector-pointer variable to type: ");
2226 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2227 if (TREE_CODE (data_ref_base) == VAR_DECL)
2228 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
2229 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2230 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
2231 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2232 fprintf (vect_dump, " vectorizing a record based array ref: ");
2233 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2234 fprintf (vect_dump, " vectorizing a pointer ref: ");
2235 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2238 /** (1) Create the new vector-pointer variable: **/
2239 vect_ptr_type = build_pointer_type (vectype);
2240 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2241 get_name (base_name));
2242 /* If any of the data-references in the stmt group does not conflict
2243 with the created vector data-reference use a ref-all pointer instead. */
2244 if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2246 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2249 tree lhs = gimple_assign_lhs (orig_stmt);
2250 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2251 get_alias_set (lhs)))
2253 vect_ptr_type = build_pointer_type_for_mode (vectype,
2255 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2256 get_name (base_name));
2260 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2265 add_referenced_var (vect_ptr);
2267 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2268 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2269 def-use update cycles for the pointer: One relative to the outer-loop
2270 (LOOP), which is what steps (3) and (4) below do. The other is relative
2271 to the inner-loop (which is the inner-most loop containing the dataref),
2272 and this is done be step (5) below.
2274 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2275 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2276 redundant. Steps (3),(4) create the following:
2279 LOOP: vp1 = phi(vp0,vp2)
2285 If there is an inner-loop nested in loop, then step (5) will also be
2286 applied, and an additional update in the inner-loop will be created:
2289 LOOP: vp1 = phi(vp0,vp2)
2291 inner: vp3 = phi(vp1,vp4)
2292 vp4 = vp3 + inner_step
2298 /** (3) Calculate the initial address the vector-pointer, and set
2299 the vector-pointer to point to it before the loop: **/
2301 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2303 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2305 pe = loop_preheader_edge (loop);
2308 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2309 gcc_assert (!new_bb);
2312 *initial_address = new_temp;
2314 /* Create: p = (vectype *) initial_base */
2315 vec_stmt = gimple_build_assign (vect_ptr,
2316 fold_convert (vect_ptr_type, new_temp));
2317 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2318 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2319 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2320 gcc_assert (!new_bb);
2323 /** (4) Handle the updating of the vector-pointer inside the loop.
2324 This is needed when ONLY_INIT is false, and also when AT_LOOP
2325 is the inner-loop nested in LOOP (during outer-loop vectorization).
2328 if (only_init && at_loop == loop) /* No update in loop is required. */
2330 /* Copy the points-to information if it exists. */
2331 if (DR_PTR_INFO (dr))
2332 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2333 vptr = vect_ptr_init;
2337 /* The step of the vector pointer is the Vector Size. */
2338 tree step = TYPE_SIZE_UNIT (vectype);
2339 /* One exception to the above is when the scalar step of the load in
2340 LOOP is zero. In this case the step here is also zero. */
2342 step = size_zero_node;
2344 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2346 create_iv (vect_ptr_init,
2347 fold_convert (vect_ptr_type, step),
2348 vect_ptr, loop, &incr_gsi, insert_after,
2349 &indx_before_incr, &indx_after_incr);
2350 incr = gsi_stmt (incr_gsi);
2351 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
2353 /* Copy the points-to information if it exists. */
2354 if (DR_PTR_INFO (dr))
2356 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2357 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2359 merge_alias_info (vect_ptr_init, indx_before_incr);
2360 merge_alias_info (vect_ptr_init, indx_after_incr);
2364 vptr = indx_before_incr;
2367 if (!nested_in_vect_loop || only_init)
2371 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2372 nested in LOOP, if exists: **/
2374 gcc_assert (nested_in_vect_loop);
2377 standard_iv_increment_position (containing_loop, &incr_gsi,
2379 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2380 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2382 incr = gsi_stmt (incr_gsi);
2383 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
2385 /* Copy the points-to information if it exists. */
2386 if (DR_PTR_INFO (dr))
2388 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2389 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2391 merge_alias_info (vect_ptr_init, indx_before_incr);
2392 merge_alias_info (vect_ptr_init, indx_after_incr);
2396 return indx_before_incr;
2403 /* Function bump_vector_ptr
2405 Increment a pointer (to a vector type) by vector-size. If requested,
2406 i.e. if PTR-INCR is given, then also connect the new increment stmt
2407 to the existing def-use update-chain of the pointer, by modifying
2408 the PTR_INCR as illustrated below:
2410 The pointer def-use update-chain before this function:
2411 DATAREF_PTR = phi (p_0, p_2)
2413 PTR_INCR: p_2 = DATAREF_PTR + step
2415 The pointer def-use update-chain after this function:
2416 DATAREF_PTR = phi (p_0, p_2)
2418 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2420 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2423 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2425 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2426 the loop. The increment amount across iterations is expected
2428 BSI - location where the new update stmt is to be placed.
2429 STMT - the original scalar memory-access stmt that is being vectorized.
2430 BUMP - optional. The offset by which to bump the pointer. If not given,
2431 the offset is assumed to be vector_size.
2433 Output: Return NEW_DATAREF_PTR as illustrated above.
2438 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2439 gimple stmt, tree bump)
2441 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2442 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2443 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2444 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2445 tree update = TYPE_SIZE_UNIT (vectype);
2448 use_operand_p use_p;
2449 tree new_dataref_ptr;
2454 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2455 dataref_ptr, update);
2456 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2457 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2458 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2460 /* Copy the points-to information if it exists. */
2461 if (DR_PTR_INFO (dr))
2462 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2463 merge_alias_info (new_dataref_ptr, dataref_ptr);
2466 return new_dataref_ptr;
2468 /* Update the vector-pointer's cross-iteration increment. */
2469 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2471 tree use = USE_FROM_PTR (use_p);
2473 if (use == dataref_ptr)
2474 SET_USE (use_p, new_dataref_ptr);
2476 gcc_assert (tree_int_cst_compare (use, update) == 0);
2479 return new_dataref_ptr;
2483 /* Function vect_create_destination_var.
2485 Create a new temporary of type VECTYPE. */
2488 vect_create_destination_var (tree scalar_dest, tree vectype)
2491 const char *new_name;
2493 enum vect_var_kind kind;
2495 kind = vectype ? vect_simple_var : vect_scalar_var;
2496 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2498 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2500 new_name = get_name (scalar_dest);
2503 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2504 add_referenced_var (vec_dest);
2509 /* Function vect_strided_store_supported.
2511 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2512 and FALSE otherwise. */
2515 vect_strided_store_supported (tree vectype)
2517 optab interleave_high_optab, interleave_low_optab;
2520 mode = (int) TYPE_MODE (vectype);
2522 /* Check that the operation is supported. */
2523 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2524 vectype, optab_default);
2525 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2526 vectype, optab_default);
2527 if (!interleave_high_optab || !interleave_low_optab)
2529 if (vect_print_dump_info (REPORT_DETAILS))
2530 fprintf (vect_dump, "no optab for interleave.");
2534 if (optab_handler (interleave_high_optab, mode)->insn_code
2536 || optab_handler (interleave_low_optab, mode)->insn_code
2537 == CODE_FOR_nothing)
2539 if (vect_print_dump_info (REPORT_DETAILS))
2540 fprintf (vect_dump, "interleave op not supported by target.");
2548 /* Function vect_permute_store_chain.
2550 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2551 a power of 2, generate interleave_high/low stmts to reorder the data
2552 correctly for the stores. Return the final references for stores in
2555 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2556 The input is 4 vectors each containing 8 elements. We assign a number to each
2557 element, the input sequence is:
2559 1st vec: 0 1 2 3 4 5 6 7
2560 2nd vec: 8 9 10 11 12 13 14 15
2561 3rd vec: 16 17 18 19 20 21 22 23
2562 4th vec: 24 25 26 27 28 29 30 31
2564 The output sequence should be:
2566 1st vec: 0 8 16 24 1 9 17 25
2567 2nd vec: 2 10 18 26 3 11 19 27
2568 3rd vec: 4 12 20 28 5 13 21 30
2569 4th vec: 6 14 22 30 7 15 23 31
2571 i.e., we interleave the contents of the four vectors in their order.
2573 We use interleave_high/low instructions to create such output. The input of
2574 each interleave_high/low operation is two vectors:
2577 the even elements of the result vector are obtained left-to-right from the
2578 high/low elements of the first vector. The odd elements of the result are
2579 obtained left-to-right from the high/low elements of the second vector.
2580 The output of interleave_high will be: 0 4 1 5
2581 and of interleave_low: 2 6 3 7
2584 The permutation is done in log LENGTH stages. In each stage interleave_high
2585 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2586 where the first argument is taken from the first half of DR_CHAIN and the
2587 second argument from it's second half.
2590 I1: interleave_high (1st vec, 3rd vec)
2591 I2: interleave_low (1st vec, 3rd vec)
2592 I3: interleave_high (2nd vec, 4th vec)
2593 I4: interleave_low (2nd vec, 4th vec)
2595 The output for the first stage is:
2597 I1: 0 16 1 17 2 18 3 19
2598 I2: 4 20 5 21 6 22 7 23
2599 I3: 8 24 9 25 10 26 11 27
2600 I4: 12 28 13 29 14 30 15 31
2602 The output of the second stage, i.e. the final result is:
2604 I1: 0 8 16 24 1 9 17 25
2605 I2: 2 10 18 26 3 11 19 27
2606 I3: 4 12 20 28 5 13 21 30
2607 I4: 6 14 22 30 7 15 23 31. */
2610 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2611 unsigned int length,
2613 gimple_stmt_iterator *gsi,
2614 VEC(tree,heap) **result_chain)
2616 tree perm_dest, vect1, vect2, high, low;
2618 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2622 enum tree_code high_code, low_code;
2624 scalar_dest = gimple_assign_lhs (stmt);
2626 /* Check that the operation is supported. */
2627 if (!vect_strided_store_supported (vectype))
2630 *result_chain = VEC_copy (tree, heap, dr_chain);
2632 for (i = 0; i < exact_log2 (length); i++)
2634 for (j = 0; j < length/2; j++)
2636 vect1 = VEC_index (tree, dr_chain, j);
2637 vect2 = VEC_index (tree, dr_chain, j+length/2);
2639 /* Create interleaving stmt:
2640 in the case of big endian:
2641 high = interleave_high (vect1, vect2)
2642 and in the case of little endian:
2643 high = interleave_low (vect1, vect2). */
2644 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2645 DECL_GIMPLE_REG_P (perm_dest) = 1;
2646 add_referenced_var (perm_dest);
2647 if (BYTES_BIG_ENDIAN)
2649 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2650 low_code = VEC_INTERLEAVE_LOW_EXPR;
2654 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2655 high_code = VEC_INTERLEAVE_LOW_EXPR;
2657 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2659 high = make_ssa_name (perm_dest, perm_stmt);
2660 gimple_assign_set_lhs (perm_stmt, high);
2661 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2662 VEC_replace (tree, *result_chain, 2*j, high);
2664 /* Create interleaving stmt:
2665 in the case of big endian:
2666 low = interleave_low (vect1, vect2)
2667 and in the case of little endian:
2668 low = interleave_high (vect1, vect2). */
2669 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2670 DECL_GIMPLE_REG_P (perm_dest) = 1;
2671 add_referenced_var (perm_dest);
2672 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2674 low = make_ssa_name (perm_dest, perm_stmt);
2675 gimple_assign_set_lhs (perm_stmt, low);
2676 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2677 VEC_replace (tree, *result_chain, 2*j+1, low);
2679 dr_chain = VEC_copy (tree, heap, *result_chain);
2684 /* Function vect_setup_realignment
2686 This function is called when vectorizing an unaligned load using
2687 the dr_explicit_realign[_optimized] scheme.
2688 This function generates the following code at the loop prolog:
2691 x msq_init = *(floor(p)); # prolog load
2692 realignment_token = call target_builtin;
2694 x msq = phi (msq_init, ---)
2696 The stmts marked with x are generated only for the case of
2697 dr_explicit_realign_optimized.
2699 The code above sets up a new (vector) pointer, pointing to the first
2700 location accessed by STMT, and a "floor-aligned" load using that pointer.
2701 It also generates code to compute the "realignment-token" (if the relevant
2702 target hook was defined), and creates a phi-node at the loop-header bb
2703 whose arguments are the result of the prolog-load (created by this
2704 function) and the result of a load that takes place in the loop (to be
2705 created by the caller to this function).
2707 For the case of dr_explicit_realign_optimized:
2708 The caller to this function uses the phi-result (msq) to create the
2709 realignment code inside the loop, and sets up the missing phi argument,
2712 msq = phi (msq_init, lsq)
2713 lsq = *(floor(p')); # load in loop
2714 result = realign_load (msq, lsq, realignment_token);
2716 For the case of dr_explicit_realign:
2718 msq = *(floor(p)); # load in loop
2720 lsq = *(floor(p')); # load in loop
2721 result = realign_load (msq, lsq, realignment_token);
2724 STMT - (scalar) load stmt to be vectorized. This load accesses
2725 a memory location that may be unaligned.
2726 BSI - place where new code is to be inserted.
2727 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2731 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2732 target hook, if defined.
2733 Return value - the result of the loop-header phi node. */
2736 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2737 tree *realignment_token,
2738 enum dr_alignment_support alignment_support_scheme,
2740 struct loop **at_loop)
2742 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2743 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2744 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2745 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2747 tree scalar_dest = gimple_assign_lhs (stmt);
2754 tree msq_init = NULL_TREE;
2757 tree msq = NULL_TREE;
2758 gimple_seq stmts = NULL;
2760 bool compute_in_loop = false;
2761 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2762 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2763 struct loop *loop_for_initial_load;
2765 gcc_assert (alignment_support_scheme == dr_explicit_realign
2766 || alignment_support_scheme == dr_explicit_realign_optimized);
2768 /* We need to generate three things:
2769 1. the misalignment computation
2770 2. the extra vector load (for the optimized realignment scheme).
2771 3. the phi node for the two vectors from which the realignment is
2772 done (for the optimized realignment scheme).
2775 /* 1. Determine where to generate the misalignment computation.
2777 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2778 calculation will be generated by this function, outside the loop (in the
2779 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2780 caller, inside the loop.
2782 Background: If the misalignment remains fixed throughout the iterations of
2783 the loop, then both realignment schemes are applicable, and also the
2784 misalignment computation can be done outside LOOP. This is because we are
2785 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2786 are a multiple of VS (the Vector Size), and therefore the misalignment in
2787 different vectorized LOOP iterations is always the same.
2788 The problem arises only if the memory access is in an inner-loop nested
2789 inside LOOP, which is now being vectorized using outer-loop vectorization.
2790 This is the only case when the misalignment of the memory access may not
2791 remain fixed throughout the iterations of the inner-loop (as explained in
2792 detail in vect_supportable_dr_alignment). In this case, not only is the
2793 optimized realignment scheme not applicable, but also the misalignment
2794 computation (and generation of the realignment token that is passed to
2795 REALIGN_LOAD) have to be done inside the loop.
2797 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2798 or not, which in turn determines if the misalignment is computed inside
2799 the inner-loop, or outside LOOP. */
2801 if (init_addr != NULL_TREE)
2803 compute_in_loop = true;
2804 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2808 /* 2. Determine where to generate the extra vector load.
2810 For the optimized realignment scheme, instead of generating two vector
2811 loads in each iteration, we generate a single extra vector load in the
2812 preheader of the loop, and in each iteration reuse the result of the
2813 vector load from the previous iteration. In case the memory access is in
2814 an inner-loop nested inside LOOP, which is now being vectorized using
2815 outer-loop vectorization, we need to determine whether this initial vector
2816 load should be generated at the preheader of the inner-loop, or can be
2817 generated at the preheader of LOOP. If the memory access has no evolution
2818 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2819 to be generated inside LOOP (in the preheader of the inner-loop). */
2821 if (nested_in_vect_loop)
2823 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2824 bool invariant_in_outerloop =
2825 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2826 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2829 loop_for_initial_load = loop;
2831 *at_loop = loop_for_initial_load;
2833 /* 3. For the case of the optimized realignment, create the first vector
2834 load at the loop preheader. */
2836 if (alignment_support_scheme == dr_explicit_realign_optimized)
2838 /* Create msq_init = *(floor(p1)) in the loop preheader */
2840 gcc_assert (!compute_in_loop);
2841 pe = loop_preheader_edge (loop_for_initial_load);
2842 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2843 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
2844 &init_addr, &inc, true, &inv_p);
2845 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2846 new_stmt = gimple_build_assign (vec_dest, data_ref);
2847 new_temp = make_ssa_name (vec_dest, new_stmt);
2848 gimple_assign_set_lhs (new_stmt, new_temp);
2849 mark_symbols_for_renaming (new_stmt);
2850 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2851 gcc_assert (!new_bb);
2852 msq_init = gimple_assign_lhs (new_stmt);
2855 /* 4. Create realignment token using a target builtin, if available.
2856 It is done either inside the containing loop, or before LOOP (as
2857 determined above). */
2859 if (targetm.vectorize.builtin_mask_for_load)
2863 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
2864 if (compute_in_loop)
2865 gcc_assert (init_addr); /* already computed by the caller. */
2868 /* Generate the INIT_ADDR computation outside LOOP. */
2869 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
2871 pe = loop_preheader_edge (loop);
2872 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2873 gcc_assert (!new_bb);
2876 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2877 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
2879 vect_create_destination_var (scalar_dest,
2880 gimple_call_return_type (new_stmt));
2881 new_temp = make_ssa_name (vec_dest, new_stmt);
2882 gimple_call_set_lhs (new_stmt, new_temp);
2884 if (compute_in_loop)
2885 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2888 /* Generate the misalignment computation outside LOOP. */
2889 pe = loop_preheader_edge (loop);
2890 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2891 gcc_assert (!new_bb);
2894 *realignment_token = gimple_call_lhs (new_stmt);
2896 /* The result of the CALL_EXPR to this builtin is determined from
2897 the value of the parameter and no global variables are touched
2898 which makes the builtin a "const" function. Requiring the
2899 builtin to have the "const" attribute makes it unnecessary
2900 to call mark_call_clobbered. */
2901 gcc_assert (TREE_READONLY (builtin_decl));
2904 if (alignment_support_scheme == dr_explicit_realign)
2907 gcc_assert (!compute_in_loop);
2908 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
2911 /* 5. Create msq = phi <msq_init, lsq> in loop */
2913 pe = loop_preheader_edge (containing_loop);
2914 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2915 msq = make_ssa_name (vec_dest, NULL);
2916 phi_stmt = create_phi_node (msq, containing_loop->header);
2917 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2918 add_phi_arg (phi_stmt, msq_init, pe);
2924 /* Function vect_strided_load_supported.
2926 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
2927 and FALSE otherwise. */
2930 vect_strided_load_supported (tree vectype)
2932 optab perm_even_optab, perm_odd_optab;
2935 mode = (int) TYPE_MODE (vectype);
2937 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
2939 if (!perm_even_optab)
2941 if (vect_print_dump_info (REPORT_DETAILS))
2942 fprintf (vect_dump, "no optab for perm_even.");
2946 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
2948 if (vect_print_dump_info (REPORT_DETAILS))
2949 fprintf (vect_dump, "perm_even op not supported by target.");
2953 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
2955 if (!perm_odd_optab)
2957 if (vect_print_dump_info (REPORT_DETAILS))
2958 fprintf (vect_dump, "no optab for perm_odd.");
2962 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
2964 if (vect_print_dump_info (REPORT_DETAILS))
2965 fprintf (vect_dump, "perm_odd op not supported by target.");
2972 /* Function vect_permute_load_chain.
2974 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
2975 a power of 2, generate extract_even/odd stmts to reorder the input data
2976 correctly. Return the final references for loads in RESULT_CHAIN.
2978 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2979 The input is 4 vectors each containing 8 elements. We assign a number to each
2980 element, the input sequence is:
2982 1st vec: 0 1 2 3 4 5 6 7
2983 2nd vec: 8 9 10 11 12 13 14 15
2984 3rd vec: 16 17 18 19 20 21 22 23
2985 4th vec: 24 25 26 27 28 29 30 31
2987 The output sequence should be:
2989 1st vec: 0 4 8 12 16 20 24 28
2990 2nd vec: 1 5 9 13 17 21 25 29
2991 3rd vec: 2 6 10 14 18 22 26 30
2992 4th vec: 3 7 11 15 19 23 27 31
2994 i.e., the first output vector should contain the first elements of each
2995 interleaving group, etc.
2997 We use extract_even/odd instructions to create such output. The input of each
2998 extract_even/odd operation is two vectors
3002 and the output is the vector of extracted even/odd elements. The output of
3003 extract_even will be: 0 2 4 6
3004 and of extract_odd: 1 3 5 7
3007 The permutation is done in log LENGTH stages. In each stage extract_even and
3008 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3009 order. In our example,
3011 E1: extract_even (1st vec, 2nd vec)
3012 E2: extract_odd (1st vec, 2nd vec)
3013 E3: extract_even (3rd vec, 4th vec)
3014 E4: extract_odd (3rd vec, 4th vec)
3016 The output for the first stage will be:
3018 E1: 0 2 4 6 8 10 12 14
3019 E2: 1 3 5 7 9 11 13 15
3020 E3: 16 18 20 22 24 26 28 30
3021 E4: 17 19 21 23 25 27 29 31
3023 In order to proceed and create the correct sequence for the next stage (or
3024 for the correct output, if the second stage is the last one, as in our
3025 example), we first put the output of extract_even operation and then the
3026 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3027 The input for the second stage is:
3029 1st vec (E1): 0 2 4 6 8 10 12 14
3030 2nd vec (E3): 16 18 20 22 24 26 28 30
3031 3rd vec (E2): 1 3 5 7 9 11 13 15
3032 4th vec (E4): 17 19 21 23 25 27 29 31
3034 The output of the second stage:
3036 E1: 0 4 8 12 16 20 24 28
3037 E2: 2 6 10 14 18 22 26 30
3038 E3: 1 5 9 13 17 21 25 29
3039 E4: 3 7 11 15 19 23 27 31
3041 And RESULT_CHAIN after reordering:
3043 1st vec (E1): 0 4 8 12 16 20 24 28
3044 2nd vec (E3): 1 5 9 13 17 21 25 29
3045 3rd vec (E2): 2 6 10 14 18 22 26 30
3046 4th vec (E4): 3 7 11 15 19 23 27 31. */
3049 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3050 unsigned int length,
3052 gimple_stmt_iterator *gsi,
3053 VEC(tree,heap) **result_chain)
3055 tree perm_dest, data_ref, first_vect, second_vect;
3057 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3061 /* Check that the operation is supported. */
3062 if (!vect_strided_load_supported (vectype))
3065 *result_chain = VEC_copy (tree, heap, dr_chain);
3066 for (i = 0; i < exact_log2 (length); i++)
3068 for (j = 0; j < length; j +=2)
3070 first_vect = VEC_index (tree, dr_chain, j);
3071 second_vect = VEC_index (tree, dr_chain, j+1);
3073 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3074 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3075 DECL_GIMPLE_REG_P (perm_dest) = 1;
3076 add_referenced_var (perm_dest);
3078 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3079 perm_dest, first_vect,
3082 data_ref = make_ssa_name (perm_dest, perm_stmt);
3083 gimple_assign_set_lhs (perm_stmt, data_ref);
3084 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3085 mark_symbols_for_renaming (perm_stmt);
3087 VEC_replace (tree, *result_chain, j/2, data_ref);
3089 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3090 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3091 DECL_GIMPLE_REG_P (perm_dest) = 1;
3092 add_referenced_var (perm_dest);
3094 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3095 perm_dest, first_vect,
3097 data_ref = make_ssa_name (perm_dest, perm_stmt);
3098 gimple_assign_set_lhs (perm_stmt, data_ref);
3099 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3100 mark_symbols_for_renaming (perm_stmt);
3102 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3104 dr_chain = VEC_copy (tree, heap, *result_chain);
3110 /* Function vect_transform_strided_load.
3112 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3113 to perform their permutation and ascribe the result vectorized statements to
3114 the scalar statements.
3118 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3119 gimple_stmt_iterator *gsi)
3121 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3122 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3123 gimple next_stmt, new_stmt;
3124 VEC(tree,heap) *result_chain = NULL;
3125 unsigned int i, gap_count;
3128 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3129 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3130 vectors, that are ready for vector computation. */
3131 result_chain = VEC_alloc (tree, heap, size);
3133 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3136 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3137 Since we scan the chain starting from it's first node, their order
3138 corresponds the order of data-refs in RESULT_CHAIN. */
3139 next_stmt = first_stmt;
3141 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3146 /* Skip the gaps. Loads created for the gaps will be removed by dead
3147 code elimination pass later. No need to check for the first stmt in
3148 the group, since it always exists.
3149 DR_GROUP_GAP is the number of steps in elements from the previous
3150 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3151 correspond to the gaps.
3153 if (next_stmt != first_stmt
3154 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3162 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3163 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3164 copies, and we put the new vector statement in the first available
3166 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3167 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3170 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3173 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3175 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3178 prev_stmt = rel_stmt;
3180 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3183 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3188 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3190 /* If NEXT_STMT accesses the same DR as the previous statement,
3191 put the same TMP_DATA_REF as its vectorized statement; otherwise
3192 get the next data-ref from RESULT_CHAIN. */
3193 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3198 VEC_free (tree, heap, result_chain);
3202 /* Function vect_force_dr_alignment_p.
3204 Returns whether the alignment of a DECL can be forced to be aligned
3205 on ALIGNMENT bit boundary. */
3208 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3210 if (TREE_CODE (decl) != VAR_DECL)
3213 if (DECL_EXTERNAL (decl))
3216 if (TREE_ASM_WRITTEN (decl))
3219 if (TREE_STATIC (decl))
3220 return (alignment <= MAX_OFILE_ALIGNMENT);
3222 return (alignment <= MAX_STACK_ALIGNMENT);
3225 /* Function vect_supportable_dr_alignment
3227 Return whether the data reference DR is supported with respect to its
3230 enum dr_alignment_support
3231 vect_supportable_dr_alignment (struct data_reference *dr)
3233 gimple stmt = DR_STMT (dr);
3234 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3235 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3236 enum machine_mode mode = TYPE_MODE (vectype);
3237 struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
3238 bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3239 bool invariant_in_outerloop = false;
3241 if (aligned_access_p (dr))
3244 if (nested_in_vect_loop)
3246 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3247 invariant_in_outerloop =
3248 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3251 /* Possibly unaligned access. */
3253 /* We can choose between using the implicit realignment scheme (generating
3254 a misaligned_move stmt) and the explicit realignment scheme (generating
3255 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3256 realignment scheme: optimized, and unoptimized.
3257 We can optimize the realignment only if the step between consecutive
3258 vector loads is equal to the vector size. Since the vector memory
3259 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3260 is guaranteed that the misalignment amount remains the same throughout the
3261 execution of the vectorized loop. Therefore, we can create the
3262 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3263 at the loop preheader.
3265 However, in the case of outer-loop vectorization, when vectorizing a
3266 memory access in the inner-loop nested within the LOOP that is now being
3267 vectorized, while it is guaranteed that the misalignment of the
3268 vectorized memory access will remain the same in different outer-loop
3269 iterations, it is *not* guaranteed that is will remain the same throughout
3270 the execution of the inner-loop. This is because the inner-loop advances
3271 with the original scalar step (and not in steps of VS). If the inner-loop
3272 step happens to be a multiple of VS, then the misalignment remains fixed
3273 and we can use the optimized realignment scheme. For example:
3279 When vectorizing the i-loop in the above example, the step between
3280 consecutive vector loads is 1, and so the misalignment does not remain
3281 fixed across the execution of the inner-loop, and the realignment cannot
3282 be optimized (as illustrated in the following pseudo vectorized loop):
3284 for (i=0; i<N; i+=4)
3285 for (j=0; j<M; j++){
3286 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3287 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3288 // (assuming that we start from an aligned address).
3291 We therefore have to use the unoptimized realignment scheme:
3293 for (i=0; i<N; i+=4)
3294 for (j=k; j<M; j+=4)
3295 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3296 // that the misalignment of the initial address is
3299 The loop can then be vectorized as follows:
3301 for (k=0; k<4; k++){
3302 rt = get_realignment_token (&vp[k]);
3303 for (i=0; i<N; i+=4){
3305 for (j=k; j<M; j+=4){
3307 va = REALIGN_LOAD <v1,v2,rt>;
3314 if (DR_IS_READ (dr))
3316 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3318 && (!targetm.vectorize.builtin_mask_for_load
3319 || targetm.vectorize.builtin_mask_for_load ()))
3321 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3322 if (nested_in_vect_loop
3323 && (TREE_INT_CST_LOW (DR_STEP (dr))
3324 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3325 return dr_explicit_realign;
3327 return dr_explicit_realign_optimized;
3330 if (optab_handler (movmisalign_optab, mode)->insn_code !=
3332 /* Can't software pipeline the loads, but can at least do them. */
3333 return dr_unaligned_supported;
3337 return dr_unaligned_unsupported;