2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "tree-vectorizer.h"
146 #include "tree-pass.h"
148 /* Main analysis functions. */
149 static loop_vec_info vect_analyze_loop (struct loop *);
150 static loop_vec_info vect_analyze_loop_form (struct loop *);
151 static bool vect_analyze_data_refs (loop_vec_info);
152 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
153 static bool vect_analyze_scalar_cycles (loop_vec_info);
154 static bool vect_analyze_data_ref_accesses (loop_vec_info);
155 static bool vect_analyze_data_refs_alignment (loop_vec_info);
156 static void vect_compute_data_refs_alignment (loop_vec_info);
157 static bool vect_analyze_operations (loop_vec_info);
159 /* Main code transformation functions. */
160 static void vect_transform_loop (loop_vec_info, struct loops *);
161 static void vect_transform_loop_bound (loop_vec_info);
162 static bool vect_transform_stmt (tree, block_stmt_iterator *);
163 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
164 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
167 static void vect_align_data_ref (tree);
168 static void vect_enhance_data_refs_alignment (loop_vec_info);
170 /* Utility functions for the analyses. */
171 static bool vect_is_simple_use (tree , struct loop *, tree *);
172 static bool exist_non_indexing_operands_for_use_p (tree, tree);
173 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
174 static void vect_mark_relevant (varray_type, tree);
175 static bool vect_stmt_relevant_p (tree, loop_vec_info);
176 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
177 static bool vect_compute_data_ref_alignment
178 (struct data_reference *, loop_vec_info);
179 static bool vect_analyze_data_ref_access (struct data_reference *);
180 static bool vect_get_first_index (tree, tree *);
181 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
182 static struct data_reference * vect_analyze_pointer_ref_access
184 static tree vect_get_base_and_bit_offset
185 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
186 static struct data_reference * vect_analyze_pointer_ref_access
188 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
189 static tree vect_compute_array_ref_alignment
190 (struct data_reference *, loop_vec_info, tree, tree *);
191 static tree vect_get_ptr_offset (tree, tree, tree *);
192 static tree vect_get_symbl_and_dr
193 (tree, tree, bool, loop_vec_info, struct data_reference **);
195 /* Utility functions for the code transformation. */
196 static tree vect_create_destination_var (tree, tree);
197 static tree vect_create_data_ref_ptr
198 (tree, block_stmt_iterator *, tree, tree *, bool);
199 static tree vect_create_index_for_vector_ref
200 (struct loop *, block_stmt_iterator *);
201 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
202 static tree get_vectype_for_scalar_type (tree);
203 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
204 static tree vect_get_vec_def_for_operand (tree, tree);
205 static tree vect_init_vector (tree, tree);
206 static void vect_finish_stmt_generation
207 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
209 /* Utilities for creation and deletion of vec_info structs. */
210 loop_vec_info new_loop_vec_info (struct loop *loop);
211 void destroy_loop_vec_info (loop_vec_info);
212 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
214 static bool vect_debug_stats (struct loop *loop);
215 static bool vect_debug_details (struct loop *loop);
218 /* Function new_stmt_vec_info.
220 Create and initialize a new stmt_vec_info struct for STMT. */
223 new_stmt_vec_info (tree stmt, struct loop *loop)
226 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
228 STMT_VINFO_TYPE (res) = undef_vec_info_type;
229 STMT_VINFO_STMT (res) = stmt;
230 STMT_VINFO_LOOP (res) = loop;
231 STMT_VINFO_RELEVANT_P (res) = 0;
232 STMT_VINFO_VECTYPE (res) = NULL;
233 STMT_VINFO_VEC_STMT (res) = NULL;
234 STMT_VINFO_DATA_REF (res) = NULL;
235 STMT_VINFO_MEMTAG (res) = NULL;
236 STMT_VINFO_VECT_DR_BASE (res) = NULL;
242 /* Function new_loop_vec_info.
244 Create and initialize a new loop_vec_info struct for LOOP, as well as
245 stmt_vec_info structs for all the stmts in LOOP. */
248 new_loop_vec_info (struct loop *loop)
252 block_stmt_iterator si;
255 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
257 bbs = get_loop_body (loop);
259 /* Create stmt_info for all stmts in the loop. */
260 for (i = 0; i < loop->num_nodes; i++)
262 basic_block bb = bbs[i];
263 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
265 tree stmt = bsi_stmt (si);
268 get_stmt_operands (stmt);
269 ann = stmt_ann (stmt);
270 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
274 LOOP_VINFO_LOOP (res) = loop;
275 LOOP_VINFO_BBS (res) = bbs;
276 LOOP_VINFO_EXIT_COND (res) = NULL;
277 LOOP_VINFO_NITERS (res) = -1;
278 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
279 LOOP_VINFO_VECT_FACTOR (res) = 0;
280 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
281 "loop_write_datarefs");
282 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
283 "loop_read_datarefs");
288 /* Function destroy_loop_vec_info.
290 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
291 stmts in the loop. */
294 destroy_loop_vec_info (loop_vec_info loop_vinfo)
299 block_stmt_iterator si;
305 loop = LOOP_VINFO_LOOP (loop_vinfo);
307 bbs = LOOP_VINFO_BBS (loop_vinfo);
308 nbbs = loop->num_nodes;
310 for (j = 0; j < nbbs; j++)
312 basic_block bb = bbs[j];
313 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
315 tree stmt = bsi_stmt (si);
316 stmt_ann_t ann = stmt_ann (stmt);
317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
319 set_stmt_info (ann, NULL);
323 free (LOOP_VINFO_BBS (loop_vinfo));
324 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
325 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
331 /* Function debug_loop_stats.
333 For vectorization statistics dumps. */
336 vect_debug_stats (struct loop *loop)
339 block_stmt_iterator si;
340 tree node = NULL_TREE;
342 if (!dump_file || !(dump_flags & TDF_STATS))
347 fprintf (dump_file, "\n");
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
358 node = bsi_stmt (si);
359 if (node && EXPR_P (node) && EXPR_LOCUS (node))
363 if (node && EXPR_P (node) && EXPR_LOCUS (node)
364 && EXPR_FILENAME (node) && EXPR_LINENO (node))
366 fprintf (dump_file, "\nloop at %s:%d: ",
367 EXPR_FILENAME (node), EXPR_LINENO (node));
375 /* Function debug_loop_details.
377 For vectorization debug dumps. */
380 vect_debug_details (struct loop *loop)
383 block_stmt_iterator si;
384 tree node = NULL_TREE;
386 if (!dump_file || !(dump_flags & TDF_DETAILS))
391 fprintf (dump_file, "\n");
400 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
402 node = bsi_stmt (si);
403 if (node && EXPR_P (node) && EXPR_LOCUS (node))
407 if (node && EXPR_P (node) && EXPR_LOCUS (node)
408 && EXPR_FILENAME (node) && EXPR_LINENO (node))
410 fprintf (dump_file, "\nloop at %s:%d: ",
411 EXPR_FILENAME (node), EXPR_LINENO (node));
419 /* Function vect_get_ptr_offset
421 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
424 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
425 tree vectype ATTRIBUTE_UNUSED,
426 tree *offset ATTRIBUTE_UNUSED)
428 /* TODO: Use alignment information. */
433 /* Function vect_get_base_and_bit_offset
435 Return the BASE of the data reference EXPR.
436 If VECTYPE is given, also compute the OFFSET from BASE in bits.
437 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
438 bits of 'a.b[i] + 4B' from a.
441 EXPR - the memory reference that is being analyzed
442 DR - the data_reference struct of the _original_ memory reference
443 (Note: DR_REF (DR) is not necessarily EXPR)
444 VECTYPE - the type that defines the alignment (i.e, we compute
445 alignment relative to TYPE_ALIGN(VECTYPE))
448 BASE (returned value) - the base of the data reference EXPR.
449 E.g, if EXPR is a.b[k].c[i][j] the returned
451 OFFSET - offset of EXPR from BASE in bits
452 BASE_ALIGNED_P - indicates if BASE is aligned
454 If something unexpected is encountered (an unsupported form of data-ref),
455 or if VECTYPE is given but OFFSET cannot be determined:
456 then NULL_TREE is returned. */
459 vect_get_base_and_bit_offset (struct data_reference *dr,
462 loop_vec_info loop_vinfo,
464 bool *base_aligned_p)
466 tree this_offset = size_zero_node;
467 tree base = NULL_TREE;
470 struct data_reference *array_dr;
471 enum tree_code code = TREE_CODE (expr);
473 *base_aligned_p = false;
477 /* These cases end the recursion: */
479 *offset = size_zero_node;
480 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
481 *base_aligned_p = true;
488 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
491 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
493 base = vect_get_ptr_offset (expr, vectype, offset);
495 *base_aligned_p = true;
499 *base_aligned_p = true;
500 *offset = size_zero_node;
506 *offset = int_const_binop (MULT_EXPR, expr,
507 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
510 /* These cases continue the recursion: */
512 oprnd0 = TREE_OPERAND (expr, 0);
513 oprnd1 = TREE_OPERAND (expr, 1);
515 this_offset = bit_position (oprnd1);
516 if (vectype && !host_integerp (this_offset, 1))
522 oprnd0 = TREE_OPERAND (expr, 0);
527 oprnd0 = TREE_OPERAND (expr, 0);
532 if (DR_REF (dr) != expr)
533 /* Build array data_reference struct if the existing DR_REF
534 doesn't match EXPR. This happens, for example, when the
535 EXPR is *T and T is initialized to &arr[indx]. The DR struct
536 contains information on the access of T, not of arr. In order
537 to continue the analysis, we create a new DR struct that
538 describes the access of arr.
540 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
544 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
545 vectype, &this_offset);
550 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
552 *offset = this_offset;
553 *base_aligned_p = true;
560 /* In case we have a PLUS_EXPR of the form
561 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
562 This is verified in vect_get_symbl_and_dr. */
563 oprnd0 = TREE_OPERAND (expr, 0);
564 oprnd1 = TREE_OPERAND (expr, 1);
566 base = vect_get_base_and_bit_offset
567 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
568 if (vectype && !base)
578 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
579 loop_vinfo, offset, base_aligned_p);
583 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
584 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
587 if (vect_debug_details (NULL))
589 print_generic_expr (dump_file, expr, TDF_SLIM);
590 fprintf (dump_file, " --> total offset for ref: ");
591 print_generic_expr (dump_file, *offset, TDF_SLIM);
599 /* Function vect_force_dr_alignment_p.
601 Returns whether the alignment of a DECL can be forced to be aligned
602 on ALIGNMENT bit boundary. */
605 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
607 if (TREE_CODE (decl) != VAR_DECL)
610 if (DECL_EXTERNAL (decl))
613 if (TREE_STATIC (decl))
614 return (alignment <= MAX_OFILE_ALIGNMENT);
616 /* This is not 100% correct. The absolute correct stack alignment
617 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
618 PREFERRED_STACK_BOUNDARY is honored by all translation units.
619 However, until someone implements forced stack alignment, SSE
620 isn't really usable without this. */
621 return (alignment <= PREFERRED_STACK_BOUNDARY);
625 /* Function vect_get_new_vect_var.
627 Returns a name for a new variable. The current naming scheme appends the
628 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
629 the name of vectorizer generated variables, and appends that to NAME if
633 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
639 if (var_kind == vect_simple_var)
644 prefix_len = strlen (prefix);
647 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
649 new_vect_var = create_tmp_var (type, prefix);
655 /* Function vect_create_index_for_vector_ref.
657 Create (and return) an index variable, along with it's update chain in the
658 loop. This variable will be used to access a memory location in a vector
662 LOOP: The loop being vectorized.
663 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
664 function can be added here, or in the loop pre-header.
667 Return an index that will be used to index a vector array. It is expected
668 that a pointer to the first vector will be used as the base address for the
671 FORNOW: we are not trying to be efficient, just creating a new index each
672 time from scratch. At this time all vector references could use the same
675 TODO: create only one index to be used by all vector references. Record
676 the index in the LOOP_VINFO the first time this procedure is called and
677 return it on subsequent calls. The increment of this index must be placed
678 just before the conditional expression that ends the single block loop. */
681 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
684 tree indx_before_incr, indx_after_incr;
686 /* It is assumed that the base pointer used for vectorized access contains
687 the address of the first vector. Therefore the index used for vectorized
688 access must be initialized to zero and incremented by 1. */
690 init = integer_zero_node;
691 step = integer_one_node;
693 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
694 create_iv (init, step, NULL_TREE, loop, bsi, false,
695 &indx_before_incr, &indx_after_incr);
697 return indx_before_incr;
701 /* Function vect_create_addr_base_for_vector_ref.
703 Create an expression that computes the address of the first memory location
704 that will be accessed for a data reference.
707 STMT: The statement containing the data reference.
708 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
709 OFFSET: Optional. If supplied, it is be added to the initial address.
712 1. Return an SSA_NAME whose value is the address of the memory location of the
713 first vector of the data reference.
714 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
715 these statement(s) which define the returned SSA_NAME.
717 FORNOW: We are only handling array accesses with step 1. */
720 vect_create_addr_base_for_vector_ref (tree stmt,
724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
725 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
726 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
727 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
728 tree base_name = unshare_expr (DR_BASE_NAME (dr));
729 tree ref = DR_REF (dr);
730 tree data_ref_base_type = TREE_TYPE (data_ref_base);
731 tree scalar_type = TREE_TYPE (ref);
732 tree scalar_ptr_type = build_pointer_type (scalar_type);
734 tree init_val, step, init_oval;
736 bool is_ptr_ref, is_array_ref, is_addr_expr;
741 tree addr_base, addr_expr;
744 /* Only the access function of the last index is relevant (i_n in
745 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
746 access_fn = DR_ACCESS_FN (dr, 0);
747 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
749 init_oval = integer_zero_node;
751 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
752 && TREE_CODE (data_ref_base) == SSA_NAME;
753 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE;
754 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
755 || TREE_CODE (data_ref_base) == PLUS_EXPR
756 || TREE_CODE (data_ref_base) == MINUS_EXPR;
757 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
759 /** Create: &(base[init_val])
761 if data_ref_base is an ARRAY_TYPE:
764 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
765 base = *((scalar_array *) data_ref_base)
769 array_base = data_ref_base;
770 else /* is_ptr_ref or is_addr_expr */
772 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
773 tree scalar_array_type = build_array_type (scalar_type, 0);
774 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
775 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
776 add_referenced_tmp_var (array_ptr);
778 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
779 add_referenced_tmp_var (dest);
781 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
782 append_to_statement_list_force (new_stmt, new_stmt_list);
784 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
785 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
786 new_temp = make_ssa_name (array_ptr, vec_stmt);
787 TREE_OPERAND (vec_stmt, 0) = new_temp;
788 append_to_statement_list_force (vec_stmt, new_stmt_list);
791 array_base = build_fold_indirect_ref (new_temp);
794 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
795 add_referenced_tmp_var (dest);
796 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
797 append_to_statement_list_force (new_stmt, new_stmt_list);
801 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
802 add_referenced_tmp_var (tmp);
803 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
804 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
805 init_val = make_ssa_name (tmp, vec_stmt);
806 TREE_OPERAND (vec_stmt, 0) = init_val;
807 append_to_statement_list_force (vec_stmt, new_stmt_list);
810 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
811 NULL_TREE, NULL_TREE);
812 addr_base = build_fold_addr_expr (array_ref);
814 /* addr_expr = addr_base */
815 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
816 get_name (base_name));
817 add_referenced_tmp_var (addr_expr);
818 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
819 new_temp = make_ssa_name (addr_expr, vec_stmt);
820 TREE_OPERAND (vec_stmt, 0) = new_temp;
821 append_to_statement_list_force (vec_stmt, new_stmt_list);
827 /* Function get_vectype_for_scalar_type.
829 Returns the vector type corresponding to SCALAR_TYPE as supported
833 get_vectype_for_scalar_type (tree scalar_type)
835 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
836 int nbytes = GET_MODE_SIZE (inner_mode);
843 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
845 nunits = UNITS_PER_SIMD_WORD / nbytes;
847 vectype = build_vector_type (scalar_type, nunits);
848 if (vect_debug_details (NULL))
850 fprintf (dump_file, "get vectype with %d units of type ", nunits);
851 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
857 if (vect_debug_details (NULL))
859 fprintf (dump_file, "vectype: ");
860 print_generic_expr (dump_file, vectype, TDF_SLIM);
863 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
865 /* TODO: tree-complex.c sometimes can parallelize operations
866 on generic vectors. We can vectorize the loop in that case,
867 but then we should re-run the lowering pass. */
868 if (vect_debug_details (NULL))
869 fprintf (dump_file, "mode not supported by target.");
877 /* Function vect_align_data_ref.
879 Handle mislignment of a memory accesses.
881 FORNOW: Can't handle misaligned accesses.
882 Make sure that the dataref is aligned. */
885 vect_align_data_ref (tree stmt)
887 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
888 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
890 /* FORNOW: can't handle misaligned accesses;
891 all accesses expected to be aligned. */
892 gcc_assert (aligned_access_p (dr));
896 /* Function vect_create_data_ref_ptr.
898 Create a memory reference expression for vector access, to be used in a
899 vector load/store stmt. The reference is based on a new pointer to vector
903 1. STMT: a stmt that references memory. Expected to be of the form
904 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
905 2. BSI: block_stmt_iterator where new stmts can be added.
906 3. OFFSET (optional): an offset to be added to the initial address accessed
907 by the data-ref in STMT.
908 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
909 pointing to the initial address.
912 1. Declare a new ptr to vector_type, and have it point to the base of the
913 data reference (initial addressed accessed by the data reference).
914 For example, for vector of type V8HI, the following code is generated:
917 vp = (v8hi *)initial_address;
919 if OFFSET is not supplied:
920 initial_address = &a[init];
921 if OFFSET is supplied:
922 initial_address = &a[init + OFFSET];
924 Return the initial_address in INITIAL_ADDRESS.
926 2. Create a data-reference in the loop based on the new vector pointer vp,
927 and using a new index variable 'idx' as follows:
931 where if ONLY_INIT is true:
934 update = idx + vector_type_size
936 Return the pointer vp'.
939 FORNOW: handle only aligned and consecutive accesses. */
942 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
943 tree *initial_address, bool only_init)
946 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
947 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
948 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
949 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
953 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
954 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
955 vuse_optype vuses = STMT_VUSE_OPS (stmt);
956 int nvuses, nv_may_defs, nv_must_defs;
960 tree new_stmt_list = NULL_TREE;
962 edge pe = loop_preheader_edge (loop);
969 base_name = unshare_expr (DR_BASE_NAME (dr));
970 if (vect_debug_details (NULL))
972 tree data_ref_base = base_name;
973 fprintf (dump_file, "create array_ref of type: ");
974 print_generic_expr (dump_file, vectype, TDF_SLIM);
975 if (TREE_CODE (data_ref_base) == VAR_DECL)
976 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
977 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
978 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
979 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
980 fprintf (dump_file, "vectorizing a record based array ref: ");
981 else if (TREE_CODE (data_ref_base) == SSA_NAME)
982 fprintf (dump_file, "vectorizing a pointer ref: ");
983 print_generic_expr (dump_file, base_name, TDF_SLIM);
986 /** (1) Create the new vector-pointer variable: **/
988 vect_ptr_type = build_pointer_type (vectype);
989 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
990 get_name (base_name));
991 add_referenced_tmp_var (vect_ptr);
994 /** (2) Handle aliasing information of the new vector-pointer: **/
996 tag = STMT_VINFO_MEMTAG (stmt_info);
998 get_var_ann (vect_ptr)->type_mem_tag = tag;
1000 /* Mark for renaming all aliased variables
1001 (i.e, the may-aliases of the type-mem-tag). */
1002 nvuses = NUM_VUSES (vuses);
1003 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
1004 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
1005 for (i = 0; i < nvuses; i++)
1007 tree use = VUSE_OP (vuses, i);
1008 if (TREE_CODE (use) == SSA_NAME)
1009 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
1011 for (i = 0; i < nv_may_defs; i++)
1013 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
1014 if (TREE_CODE (def) == SSA_NAME)
1015 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1017 for (i = 0; i < nv_must_defs; i++)
1019 tree def = V_MUST_DEF_OP (v_must_defs, i);
1020 if (TREE_CODE (def) == SSA_NAME)
1021 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1025 /** (3) Calculate the initial address the vector-pointer, and set
1026 the vector-pointer to point to it before the loop: **/
1028 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1029 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1031 pe = loop_preheader_edge (loop);
1032 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1033 gcc_assert (!new_bb);
1034 *initial_address = new_temp;
1036 /* Create: p = (vectype *) initial_base */
1037 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1038 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1039 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1040 TREE_OPERAND (vec_stmt, 0) = new_temp;
1041 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1042 gcc_assert (!new_bb);
1043 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1046 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1048 if (only_init) /* No update in loop is required. */
1049 return vect_ptr_init;
1051 idx = vect_create_index_for_vector_ref (loop, bsi);
1053 /* Create: update = idx * vectype_size */
1054 ptr_update = create_tmp_var (integer_type_node, "update");
1055 add_referenced_tmp_var (ptr_update);
1056 vectype_size = build_int_cst (integer_type_node,
1057 GET_MODE_SIZE (TYPE_MODE (vectype)));
1058 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1059 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1060 new_temp = make_ssa_name (ptr_update, vec_stmt);
1061 TREE_OPERAND (vec_stmt, 0) = new_temp;
1062 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1064 /* Create: data_ref_ptr = vect_ptr_init + update */
1065 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1066 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1067 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1068 TREE_OPERAND (vec_stmt, 0) = new_temp;
1069 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1070 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1072 return data_ref_ptr;
1076 /* Function vect_create_destination_var.
1078 Create a new temporary of type VECTYPE. */
1081 vect_create_destination_var (tree scalar_dest, tree vectype)
1084 const char *new_name;
1086 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1088 new_name = get_name (scalar_dest);
1091 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1092 add_referenced_tmp_var (vec_dest);
1098 /* Function vect_init_vector.
1100 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1101 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1102 used in the vectorization of STMT. */
1105 vect_init_vector (tree stmt, tree vector_var)
1107 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1108 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1111 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1117 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1118 add_referenced_tmp_var (new_var);
1120 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1121 new_temp = make_ssa_name (new_var, init_stmt);
1122 TREE_OPERAND (init_stmt, 0) = new_temp;
1124 pe = loop_preheader_edge (loop);
1125 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1126 gcc_assert (!new_bb);
1128 if (vect_debug_details (NULL))
1130 fprintf (dump_file, "created new init_stmt: ");
1131 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1134 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1139 /* Function vect_get_vec_def_for_operand.
1141 OP is an operand in STMT. This function returns a (vector) def that will be
1142 used in the vectorized stmt for STMT.
1144 In the case that OP is an SSA_NAME which is defined in the loop, then
1145 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1147 In case OP is an invariant or constant, a new stmt that creates a vector def
1148 needs to be introduced. */
1151 vect_get_vec_def_for_operand (tree op, tree stmt)
1156 stmt_vec_info def_stmt_info = NULL;
1157 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1158 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1159 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1160 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1167 if (vect_debug_details (NULL))
1169 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1170 print_generic_expr (dump_file, op, TDF_SLIM);
1173 /** ===> Case 1: operand is a constant. **/
1175 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1177 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1181 /* Build a tree with vector elements. */
1182 if (vect_debug_details (NULL))
1183 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1185 for (i = nunits - 1; i >= 0; --i)
1187 t = tree_cons (NULL_TREE, op, t);
1189 vec_cst = build_vector (vectype, t);
1190 return vect_init_vector (stmt, vec_cst);
1193 gcc_assert (TREE_CODE (op) == SSA_NAME);
1195 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
1197 def_stmt = SSA_NAME_DEF_STMT (op);
1198 def_stmt_info = vinfo_for_stmt (def_stmt);
1200 if (vect_debug_details (NULL))
1202 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
1203 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1207 /** ==> Case 2.1: operand is defined inside the loop. **/
1211 /* Get the def from the vectorized stmt. */
1213 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1214 gcc_assert (vec_stmt);
1215 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
1220 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1221 it is a reduction/induction. **/
1223 bb = bb_for_stmt (def_stmt);
1224 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1226 if (vect_debug_details (NULL))
1227 fprintf (dump_file, "reduction/induction - unsupported.");
1228 internal_error ("no support for reduction/induction"); /* FORNOW */
1232 /** ==> Case 2.3: operand is defined outside the loop -
1233 it is a loop invariant. */
1235 switch (TREE_CODE (def_stmt))
1238 def = PHI_RESULT (def_stmt);
1241 def = TREE_OPERAND (def_stmt, 0);
1244 def = TREE_OPERAND (def_stmt, 0);
1245 gcc_assert (IS_EMPTY_STMT (def_stmt));
1249 if (vect_debug_details (NULL))
1251 fprintf (dump_file, "unsupported defining stmt: ");
1252 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1254 internal_error ("unsupported defining stmt");
1257 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1259 if (vect_debug_details (NULL))
1260 fprintf (dump_file, "Create vector_inv.");
1262 for (i = nunits - 1; i >= 0; --i)
1264 t = tree_cons (NULL_TREE, def, t);
1267 vec_inv = build_constructor (vectype, t);
1268 return vect_init_vector (stmt, vec_inv);
1272 /* Function vect_finish_stmt_generation.
1274 Insert a new stmt. */
1277 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
1279 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1281 if (vect_debug_details (NULL))
1283 fprintf (dump_file, "add new stmt: ");
1284 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1287 /* Make sure bsi points to the stmt that is being vectorized. */
1289 /* Assumption: any stmts created for the vectorization of stmt S were
1290 inserted before S. BSI is expected to point to S or some new stmt before S. */
1292 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
1294 gcc_assert (stmt == bsi_stmt (*bsi));
1298 /* Function vectorizable_assignment.
1300 Check if STMT performs an assignment (copy) that can be vectorized.
1301 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1302 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1303 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1306 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1312 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1313 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1314 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1317 /* Is vectorizable assignment? */
1319 if (TREE_CODE (stmt) != MODIFY_EXPR)
1322 scalar_dest = TREE_OPERAND (stmt, 0);
1323 if (TREE_CODE (scalar_dest) != SSA_NAME)
1326 op = TREE_OPERAND (stmt, 1);
1327 if (!vect_is_simple_use (op, loop, NULL))
1329 if (vect_debug_details (NULL))
1330 fprintf (dump_file, "use not simple.");
1334 if (!vec_stmt) /* transformation not required. */
1336 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1341 if (vect_debug_details (NULL))
1342 fprintf (dump_file, "transform assignment.");
1345 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1348 op = TREE_OPERAND (stmt, 1);
1349 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1351 /* Arguments are ready. create the new vector stmt. */
1352 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1353 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1354 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1355 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1361 /* Function vectorizable_operation.
1363 Check if STMT performs a binary or unary operation that can be vectorized.
1364 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1365 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1366 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1369 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1374 tree op0, op1 = NULL;
1375 tree vec_oprnd0, vec_oprnd1=NULL;
1376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1377 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1378 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1380 enum tree_code code;
1381 enum machine_mode vec_mode;
1387 /* Is STMT a vectorizable binary/unary operation? */
1388 if (TREE_CODE (stmt) != MODIFY_EXPR)
1391 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1394 operation = TREE_OPERAND (stmt, 1);
1395 code = TREE_CODE (operation);
1396 optab = optab_for_tree_code (code, vectype);
1398 /* Support only unary or binary operations. */
1399 op_type = TREE_CODE_LENGTH (code);
1400 if (op_type != unary_op && op_type != binary_op)
1402 if (vect_debug_details (NULL))
1403 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1407 for (i = 0; i < op_type; i++)
1409 op = TREE_OPERAND (operation, i);
1410 if (!vect_is_simple_use (op, loop, NULL))
1412 if (vect_debug_details (NULL))
1413 fprintf (dump_file, "use not simple.");
1418 /* Supportable by target? */
1421 if (vect_debug_details (NULL))
1422 fprintf (dump_file, "no optab.");
1425 vec_mode = TYPE_MODE (vectype);
1426 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1428 if (vect_debug_details (NULL))
1429 fprintf (dump_file, "op not supported by target.");
1433 if (!vec_stmt) /* transformation not required. */
1435 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1441 if (vect_debug_details (NULL))
1442 fprintf (dump_file, "transform binary/unary operation.");
1445 scalar_dest = TREE_OPERAND (stmt, 0);
1446 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1449 op0 = TREE_OPERAND (operation, 0);
1450 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1452 if (op_type == binary_op)
1454 op1 = TREE_OPERAND (operation, 1);
1455 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1458 /* Arguments are ready. create the new vector stmt. */
1460 if (op_type == binary_op)
1461 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1462 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1464 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1465 build1 (code, vectype, vec_oprnd0));
1466 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1467 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1468 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1474 /* Function vectorizable_store.
1476 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1478 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1479 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1480 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1483 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1489 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1490 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1491 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1492 enum machine_mode vec_mode;
1495 /* Is vectorizable store? */
1497 if (TREE_CODE (stmt) != MODIFY_EXPR)
1500 scalar_dest = TREE_OPERAND (stmt, 0);
1501 if (TREE_CODE (scalar_dest) != ARRAY_REF
1502 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1505 op = TREE_OPERAND (stmt, 1);
1506 if (!vect_is_simple_use (op, loop, NULL))
1508 if (vect_debug_details (NULL))
1509 fprintf (dump_file, "use not simple.");
1513 vec_mode = TYPE_MODE (vectype);
1514 /* FORNOW. In some cases can vectorize even if data-type not supported
1515 (e.g. - array initialization with 0). */
1516 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1519 if (!STMT_VINFO_DATA_REF (stmt_info))
1522 if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
1525 if (!vec_stmt) /* transformation not required. */
1527 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1533 if (vect_debug_details (NULL))
1534 fprintf (dump_file, "transform store");
1536 /* Handle use - get the vectorized def from the defining stmt. */
1537 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1540 /* FORNOW: make sure the data reference is aligned. */
1541 vect_align_data_ref (stmt);
1542 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1543 data_ref = build_fold_indirect_ref (data_ref);
1545 /* Arguments are ready. create the new vector stmt. */
1546 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1547 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1553 /* vectorizable_load.
1555 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1557 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1558 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1559 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1562 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1565 tree vec_dest = NULL;
1566 tree data_ref = NULL;
1568 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1569 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1570 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1577 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1578 edge pe = loop_preheader_edge (loop);
1579 bool software_pipeline_loads_p = false;
1581 /* Is vectorizable load? */
1583 if (TREE_CODE (stmt) != MODIFY_EXPR)
1586 scalar_dest = TREE_OPERAND (stmt, 0);
1587 if (TREE_CODE (scalar_dest) != SSA_NAME)
1590 op = TREE_OPERAND (stmt, 1);
1591 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1594 if (!STMT_VINFO_DATA_REF (stmt_info))
1597 mode = (int) TYPE_MODE (vectype);
1599 /* FORNOW. In some cases can vectorize even if data-type not supported
1600 (e.g. - data copies). */
1601 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1603 if (vect_debug_details (loop))
1604 fprintf (dump_file, "Aligned load, but unsupported type.");
1608 if (!aligned_access_p (dr))
1610 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1611 && (!targetm.vectorize.builtin_mask_for_load
1612 || targetm.vectorize.builtin_mask_for_load ()))
1613 software_pipeline_loads_p = true;
1614 else if (!targetm.vectorize.misaligned_mem_ok (mode))
1616 /* Possibly unaligned access, and can't software pipeline the loads */
1617 if (vect_debug_details (loop))
1618 fprintf (dump_file, "Arbitrary load not supported.");
1623 if (!vec_stmt) /* transformation not required. */
1625 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1631 if (vect_debug_details (NULL))
1632 fprintf (dump_file, "transform load.");
1634 if (!software_pipeline_loads_p)
1645 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1646 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1647 if (aligned_access_p (dr))
1648 data_ref = build_fold_indirect_ref (data_ref);
1651 int mis = DR_MISALIGNMENT (dr);
1652 tree tmis = (mis == -1 ?
1654 build_int_cst (integer_type_node, mis));
1655 tmis = int_const_binop (MULT_EXPR, tmis,
1656 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
1657 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1659 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1660 new_temp = make_ssa_name (vec_dest, new_stmt);
1661 TREE_OPERAND (new_stmt, 0) = new_temp;
1662 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1664 else /* software-pipeline the loads */
1668 msq_init = *(floor(p1))
1669 p2 = initial_addr + VS - 1;
1670 magic = have_builtin ? builtin_result : initial_address;
1673 p2' = p2 + indx * vectype_size
1675 vec_dest = realign_load (msq, lsq, magic)
1689 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1690 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1691 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1693 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1694 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1695 new_temp = make_ssa_name (vec_dest, new_stmt);
1696 TREE_OPERAND (new_stmt, 0) = new_temp;
1697 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1698 gcc_assert (!new_bb);
1699 msq_init = TREE_OPERAND (new_stmt, 0);
1702 /* <2> Create lsq = *(floor(p2')) in the loop */
1703 offset = build_int_cst (integer_type_node,
1704 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1705 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1706 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1707 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1708 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1709 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1710 new_temp = make_ssa_name (vec_dest, new_stmt);
1711 TREE_OPERAND (new_stmt, 0) = new_temp;
1712 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1713 lsq = TREE_OPERAND (new_stmt, 0);
1717 if (targetm.vectorize.builtin_mask_for_load)
1719 /* Create permutation mask, if required, in loop preheader. */
1721 params = build_tree_list (NULL_TREE, init_addr);
1722 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1723 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1724 new_stmt = build_function_call_expr (builtin_decl, params);
1725 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1726 new_temp = make_ssa_name (vec_dest, new_stmt);
1727 TREE_OPERAND (new_stmt, 0) = new_temp;
1728 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1729 gcc_assert (!new_bb);
1730 magic = TREE_OPERAND (new_stmt, 0);
1734 /* Use current address instead of init_addr for reduced reg pressure. */
1735 magic = dataref_ptr;
1739 /* <4> Create msq = phi <msq_init, lsq> in loop */
1740 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1741 msq = make_ssa_name (vec_dest, NULL_TREE);
1742 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1743 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1744 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
1745 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
1748 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1749 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1750 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1751 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1752 new_temp = make_ssa_name (vec_dest, new_stmt);
1753 TREE_OPERAND (new_stmt, 0) = new_temp;
1754 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1757 *vec_stmt = new_stmt;
1762 /* Function vect_transform_stmt.
1764 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1767 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1769 bool is_store = false;
1770 tree vec_stmt = NULL_TREE;
1771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1774 switch (STMT_VINFO_TYPE (stmt_info))
1776 case op_vec_info_type:
1777 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1781 case assignment_vec_info_type:
1782 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1786 case load_vec_info_type:
1787 done = vectorizable_load (stmt, bsi, &vec_stmt);
1791 case store_vec_info_type:
1792 done = vectorizable_store (stmt, bsi, &vec_stmt);
1797 if (vect_debug_details (NULL))
1798 fprintf (dump_file, "stmt not supported.");
1802 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1808 /* Function vect_transform_loop_bound.
1810 Create a new exit condition for the loop. */
1813 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1815 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1816 edge exit_edge = loop->single_exit;
1817 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1818 tree indx_before_incr, indx_after_incr;
1819 tree orig_cond_expr;
1820 HOST_WIDE_INT old_N = 0;
1823 tree new_loop_bound;
1827 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1828 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1829 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1832 assuming number-of-iterations divides by the vectorization factor. */
1833 gcc_assert (!(old_N % vf));
1835 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1836 gcc_assert (orig_cond_expr);
1837 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
1839 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1840 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1842 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1843 to point to the exit condition. */
1844 bsi_next (&loop_exit_bsi);
1845 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
1847 /* new loop exit test: */
1848 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1849 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1851 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1852 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1853 else /* 'then' edge loops back. */
1854 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1856 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1857 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1859 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1861 /* remove old loop exit test: */
1862 bsi_remove (&loop_exit_bsi);
1864 if (vect_debug_details (NULL))
1865 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1869 /* Function vect_transform_loop.
1871 The analysis phase has determined that the loop is vectorizable.
1872 Vectorize the loop - created vectorized stmts to replace the scalar
1873 stmts in the loop, and update the loop exit condition. */
1876 vect_transform_loop (loop_vec_info loop_vinfo,
1877 struct loops *loops ATTRIBUTE_UNUSED)
1879 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1880 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1881 int nbbs = loop->num_nodes;
1882 block_stmt_iterator si;
1884 #ifdef ENABLE_CHECKING
1885 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1888 if (vect_debug_details (NULL))
1889 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1891 /* 1) Make sure the loop header has exactly two entries
1892 2) Make sure we have a preheader basic block. */
1894 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1896 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1899 /* FORNOW: the vectorizer supports only loops which body consist
1900 of one basic block (header + empty latch). When the vectorizer will
1901 support more involved loop forms, the order by which the BBs are
1902 traversed need to be reconsidered. */
1904 for (i = 0; i < nbbs; i++)
1906 basic_block bb = bbs[i];
1908 for (si = bsi_start (bb); !bsi_end_p (si);)
1910 tree stmt = bsi_stmt (si);
1911 stmt_vec_info stmt_info;
1914 if (vect_debug_details (NULL))
1916 fprintf (dump_file, "------>vectorizing statement: ");
1917 print_generic_expr (dump_file, stmt, TDF_SLIM);
1919 stmt_info = vinfo_for_stmt (stmt);
1920 gcc_assert (stmt_info);
1921 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1926 #ifdef ENABLE_CHECKING
1927 /* FORNOW: Verify that all stmts operate on the same number of
1928 units and no inner unrolling is necessary. */
1929 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
1930 == vectorization_factor);
1932 /* -------- vectorize statement ------------ */
1933 if (vect_debug_details (NULL))
1934 fprintf (dump_file, "transform statement.");
1936 is_store = vect_transform_stmt (stmt, &si);
1939 /* free the attached stmt_vec_info and remove the stmt. */
1940 stmt_ann_t ann = stmt_ann (stmt);
1942 set_stmt_info (ann, NULL);
1951 vect_transform_loop_bound (loop_vinfo);
1953 if (vect_debug_details (loop))
1954 fprintf (dump_file,"Success! loop vectorized.");
1955 if (vect_debug_stats (loop))
1956 fprintf (dump_file, "LOOP VECTORIZED.");
1960 /* Function vect_is_simple_use.
1963 LOOP - the loop that is being vectorized.
1964 OPERAND - operand of a stmt in LOOP.
1965 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1967 Returns whether a stmt with OPERAND can be vectorized.
1968 Supportable operands are constants, loop invariants, and operands that are
1969 defined by the current iteration of the loop. Unsupportable operands are
1970 those that are defined by a previous iteration of the loop (as is the case
1971 in reduction/induction computations). */
1974 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1982 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1985 if (TREE_CODE (operand) != SSA_NAME)
1988 def_stmt = SSA_NAME_DEF_STMT (operand);
1989 if (def_stmt == NULL_TREE )
1991 if (vect_debug_details (NULL))
1992 fprintf (dump_file, "no def_stmt.");
1996 /* empty stmt is expected only in case of a function argument.
1997 (Otherwise - we expect a phi_node or a modify_expr). */
1998 if (IS_EMPTY_STMT (def_stmt))
2000 tree arg = TREE_OPERAND (def_stmt, 0);
2001 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
2003 if (vect_debug_details (NULL))
2005 fprintf (dump_file, "Unexpected empty stmt: ");
2006 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2011 /* phi_node inside the loop indicates an induction/reduction pattern.
2012 This is not supported yet. */
2013 bb = bb_for_stmt (def_stmt);
2014 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2016 if (vect_debug_details (NULL))
2017 fprintf (dump_file, "reduction/induction - unsupported.");
2018 return false; /* FORNOW: not supported yet. */
2021 /* Expecting a modify_expr or a phi_node. */
2022 if (TREE_CODE (def_stmt) == MODIFY_EXPR
2023 || TREE_CODE (def_stmt) == PHI_NODE)
2034 /* Function vect_analyze_operations.
2036 Scan the loop stmts and make sure they are all vectorizable. */
2039 vect_analyze_operations (loop_vec_info loop_vinfo)
2041 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2042 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2043 int nbbs = loop->num_nodes;
2044 block_stmt_iterator si;
2045 int vectorization_factor = 0;
2050 if (vect_debug_details (NULL))
2051 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
2053 for (i = 0; i < nbbs; i++)
2055 basic_block bb = bbs[i];
2057 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2059 tree stmt = bsi_stmt (si);
2061 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2064 if (vect_debug_details (NULL))
2066 fprintf (dump_file, "==> examining statement: ");
2067 print_generic_expr (dump_file, stmt, TDF_SLIM);
2070 gcc_assert (stmt_info);
2072 /* skip stmts which do not need to be vectorized.
2073 this is expected to include:
2074 - the COND_EXPR which is the loop exit condition
2075 - any LABEL_EXPRs in the loop
2076 - computations that are used only for array indexing or loop
2079 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2081 if (vect_debug_details (NULL))
2082 fprintf (dump_file, "irrelevant.");
2086 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
2088 if (vect_debug_stats (loop) || vect_debug_details (loop))
2090 fprintf (dump_file, "not vectorized: vector stmt in loop:");
2091 print_generic_expr (dump_file, stmt, TDF_SLIM);
2096 if (STMT_VINFO_DATA_REF (stmt_info))
2097 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
2098 else if (TREE_CODE (stmt) == MODIFY_EXPR)
2099 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
2101 scalar_type = TREE_TYPE (stmt);
2103 if (vect_debug_details (NULL))
2105 fprintf (dump_file, "get vectype for scalar type: ");
2106 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2109 vectype = get_vectype_for_scalar_type (scalar_type);
2112 if (vect_debug_stats (loop) || vect_debug_details (loop))
2114 fprintf (dump_file, "not vectorized: unsupported data-type ");
2115 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2120 if (vect_debug_details (NULL))
2122 fprintf (dump_file, "vectype: ");
2123 print_generic_expr (dump_file, vectype, TDF_SLIM);
2125 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2127 ok = (vectorizable_operation (stmt, NULL, NULL)
2128 || vectorizable_assignment (stmt, NULL, NULL)
2129 || vectorizable_load (stmt, NULL, NULL)
2130 || vectorizable_store (stmt, NULL, NULL));
2134 if (vect_debug_stats (loop) || vect_debug_details (loop))
2136 fprintf (dump_file, "not vectorized: stmt not supported: ");
2137 print_generic_expr (dump_file, stmt, TDF_SLIM);
2142 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2143 if (vect_debug_details (NULL))
2144 fprintf (dump_file, "nunits = %d", nunits);
2146 if (vectorization_factor)
2148 /* FORNOW: don't allow mixed units.
2149 This restriction will be relaxed in the future. */
2150 if (nunits != vectorization_factor)
2152 if (vect_debug_stats (loop) || vect_debug_details (loop))
2153 fprintf (dump_file, "not vectorized: mixed data-types");
2158 vectorization_factor = nunits;
2160 #ifdef ENABLE_CHECKING
2161 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
2162 * vectorization_factor == UNITS_PER_SIMD_WORD);
2167 /* TODO: Analyze cost. Decide if worth while to vectorize. */
2169 if (vectorization_factor <= 1)
2171 if (vect_debug_stats (loop) || vect_debug_details (loop))
2172 fprintf (dump_file, "not vectorized: unsupported data-type");
2175 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
2177 /* FORNOW: handle only cases where the loop bound divides by the
2178 vectorization factor. */
2180 if (vect_debug_details (NULL))
2182 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
2183 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
2185 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2187 if (vect_debug_stats (loop) || vect_debug_details (loop))
2188 fprintf (dump_file, "not vectorized: Unknown loop bound.");
2192 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2193 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
2195 if (vect_debug_stats (loop) || vect_debug_details (loop))
2196 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
2197 vectorization_factor);
2205 /* Function exist_non_indexing_operands_for_use_p
2207 USE is one of the uses attached to STMT. Check if USE is
2208 used in STMT for anything other than indexing an array. */
2211 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
2214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2216 /* USE corresponds to some operand in STMT. If there is no data
2217 reference in STMT, then any operand that corresponds to USE
2218 is not indexing an array. */
2219 if (!STMT_VINFO_DATA_REF (stmt_info))
2222 /* STMT has a data_ref. FORNOW this means that its of one of
2223 the following forms:
2226 (This should have been verified in analyze_data_refs).
2228 'var' in the second case corresponds to a def, not a use,
2229 so USE cannot correspond to any operands that are not used
2232 Therefore, all we need to check is if STMT falls into the
2233 first case, and whether var corresponds to USE. */
2235 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
2238 operand = TREE_OPERAND (stmt, 1);
2240 if (TREE_CODE (operand) != SSA_NAME)
2250 /* Function vect_is_simple_iv_evolution.
2252 FORNOW: A simple evolution of an induction variables in the loop is
2253 considered a polynomial evolution with constant step. */
2256 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2257 tree * step, bool strict)
2262 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2264 /* When there is no evolution in this loop, the evolution function
2266 if (evolution_part == NULL_TREE)
2269 /* When the evolution is a polynomial of degree >= 2
2270 the evolution function is not "simple". */
2271 if (tree_is_chrec (evolution_part))
2274 step_expr = evolution_part;
2275 init_expr = initial_condition (access_fn);
2277 if (vect_debug_details (NULL))
2279 fprintf (dump_file, "step: ");
2280 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2281 fprintf (dump_file, ", init: ");
2282 print_generic_expr (dump_file, init_expr, TDF_SLIM);
2288 if (TREE_CODE (step_expr) != INTEGER_CST)
2290 if (vect_debug_details (NULL))
2291 fprintf (dump_file, "step unknown.");
2296 if (!integer_onep (step_expr))
2298 if (vect_debug_details (NULL))
2299 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2307 /* Function vect_analyze_scalar_cycles.
2309 Examine the cross iteration def-use cycles of scalar variables, by
2310 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
2311 cycles that they represent do not impede vectorization.
2313 FORNOW: Reduction as in the following loop, is not supported yet:
2317 The cross-iteration cycle corresponding to variable 'sum' will be
2318 considered too complicated and will impede vectorization.
2320 FORNOW: Induction as in the following loop, is not supported yet:
2325 However, the following loop *is* vectorizable:
2330 In both loops there exists a def-use cycle for the variable i:
2331 loop: i_2 = PHI (i_0, i_1)
2336 The evolution of the above cycle is considered simple enough,
2337 however, we also check that the cycle does not need to be
2338 vectorized, i.e - we check that the variable that this cycle
2339 defines is only used for array indexing or in stmts that do not
2340 need to be vectorized. This is not the case in loop2, but it
2341 *is* the case in loop3. */
2344 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
2347 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2348 basic_block bb = loop->header;
2351 if (vect_debug_details (NULL))
2352 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
2354 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
2356 tree access_fn = NULL;
2358 if (vect_debug_details (NULL))
2360 fprintf (dump_file, "Analyze phi: ");
2361 print_generic_expr (dump_file, phi, TDF_SLIM);
2364 /* Skip virtual phi's. The data dependences that are associated with
2365 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2367 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2369 if (vect_debug_details (NULL))
2370 fprintf (dump_file, "virtual phi. skip.");
2374 /* Analyze the evolution function. */
2376 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2377 those of loop induction variables; This property is verified here.
2379 Furthermore, if that induction variable is used in an operation
2380 that needs to be vectorized (i.e, is not solely used to index
2381 arrays and check the exit condition) - we do not support its
2382 vectorization yet. This property is verified in vect_is_simple_use,
2383 during vect_analyze_operations. */
2385 access_fn = /* instantiate_parameters
2387 analyze_scalar_evolution (loop, PHI_RESULT (phi));
2391 if (vect_debug_stats (loop) || vect_debug_details (loop))
2392 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2396 if (vect_debug_details (NULL))
2398 fprintf (dump_file, "Access function of PHI: ");
2399 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2402 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
2405 if (vect_debug_stats (loop) || vect_debug_details (loop))
2406 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2415 /* Function vect_analyze_data_ref_dependence.
2417 Return TRUE if there (might) exist a dependence between a memory-reference
2418 DRA and a memory-reference DRB. */
2421 vect_analyze_data_ref_dependence (struct data_reference *dra,
2422 struct data_reference *drb,
2426 struct data_dependence_relation *ddr;
2428 if (!array_base_name_differ_p (dra, drb, &differ_p))
2430 if (vect_debug_stats (loop) || vect_debug_details (loop))
2433 "not vectorized: can't determine dependence between: ");
2434 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2435 fprintf (dump_file, " and ");
2436 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2444 ddr = initialize_data_dependence_relation (dra, drb);
2445 compute_affine_dependence (ddr);
2447 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2450 if (vect_debug_stats (loop) || vect_debug_details (loop))
2453 "not vectorized: possible dependence between data-refs ");
2454 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2455 fprintf (dump_file, " and ");
2456 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2463 /* Function vect_analyze_data_ref_dependences.
2465 Examine all the data references in the loop, and make sure there do not
2466 exist any data dependences between them.
2468 TODO: dependences which distance is greater than the vectorization factor
2472 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2475 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2476 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2477 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2479 /* Examine store-store (output) dependences. */
2481 if (vect_debug_details (NULL))
2482 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2484 if (vect_debug_details (NULL))
2485 fprintf (dump_file, "compare all store-store pairs.");
2487 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2489 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2491 struct data_reference *dra =
2492 VARRAY_GENERIC_PTR (loop_write_refs, i);
2493 struct data_reference *drb =
2494 VARRAY_GENERIC_PTR (loop_write_refs, j);
2495 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2500 /* Examine load-store (true/anti) dependences. */
2502 if (vect_debug_details (NULL))
2503 fprintf (dump_file, "compare all load-store pairs.");
2505 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2507 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2509 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2510 struct data_reference *drb =
2511 VARRAY_GENERIC_PTR (loop_write_refs, j);
2512 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2521 /* Function vect_get_first_index.
2523 REF is a data reference.
2524 If it is an ARRAY_REF: if its lower bound is simple enough,
2525 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2526 If it is not an ARRAY_REF: REF has no "first index";
2527 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2530 vect_get_first_index (tree ref, tree *array_first_index)
2534 if (TREE_CODE (ref) != ARRAY_REF)
2535 *array_first_index = size_zero_node;
2538 array_start = array_ref_low_bound (ref);
2539 if (!host_integerp (array_start,0))
2541 if (vect_debug_details (NULL))
2543 fprintf (dump_file, "array min val not simple integer cst.");
2544 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2548 *array_first_index = array_start;
2555 /* Function vect_compute_array_base_alignment.
2556 A utility function of vect_compute_array_ref_alignment.
2558 Compute the misalignment of ARRAY in bits.
2561 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
2562 VECTYPE - we are interested in the misalignment modulo the size of vectype.
2563 if NULL: don't compute misalignment, just return the base of ARRAY.
2564 PREV_DIMENSIONS - initialized to one.
2565 MISALIGNMENT - the computed misalignment in bits.
2568 If VECTYPE is not NULL:
2569 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
2570 the base of the array, and put the computed misalignment in MISALIGNMENT.
2572 Return the base of the array.
2574 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
2575 a[idx_N]...[idx_2][idx_1] is
2576 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
2577 ... + idx_N * dim_0 * ... * dim_N-1}.
2578 (The misalignment of &a is not checked here).
2579 Note, that every term contains dim_0, therefore, if dim_0 is a
2580 multiple of NUNITS, the whole sum is a multiple of NUNITS.
2581 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
2582 NUINTS, we can say that the misalignment of the sum is equal to
2583 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
2584 we can't determine this array misalignment, and we return
2586 We proceed recursively in this manner, accumulating total misalignment
2587 and the multiplication of previous dimensions for correct misalignment
2591 vect_compute_array_base_alignment (tree array,
2593 tree *prev_dimensions,
2598 tree dimension_size;
2600 tree bits_per_vectype;
2601 tree bits_per_vectype_unit;
2603 /* The 'stop condition' of the recursion. */
2604 if (TREE_CODE (array) != ARRAY_REF)
2608 /* Just get the base decl. */
2609 return vect_compute_array_base_alignment
2610 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2612 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
2613 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
2616 domain = TYPE_DOMAIN (TREE_TYPE (array));
2618 int_const_binop (PLUS_EXPR,
2619 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
2620 TYPE_MIN_VALUE (domain), 1),
2623 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
2624 is a multiple of NUNITS:
2626 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
2628 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
2629 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
2630 if (integer_zerop (mis))
2631 /* This array is aligned. Continue just in order to get the base decl. */
2632 return vect_compute_array_base_alignment
2633 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2635 index = TREE_OPERAND (array, 1);
2636 if (!host_integerp (index, 1))
2637 /* The current index is not constant. */
2640 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
2642 bits_per_vectype = fold_convert (unsigned_type_node,
2643 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2644 GET_MODE_SIZE (TYPE_MODE (vectype))));
2645 bits_per_vectype_unit = fold_convert (unsigned_type_node,
2646 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2647 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
2649 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
2653 (*misalignment + index_val * dimension_size * *prev_dimensions)
2657 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
2658 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
2659 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
2660 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
2661 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
2664 *prev_dimensions = int_const_binop (MULT_EXPR,
2665 *prev_dimensions, dimension_size, 1);
2667 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
2673 /* Function vect_compute_data_ref_alignment
2675 Compute the misalignment of the data reference DR.
2678 1. If during the misalignment computation it is found that the data reference
2679 cannot be vectorized then false is returned.
2680 2. DR_MISALIGNMENT (DR) is defined.
2682 FOR NOW: No analysis is actually performed. Misalignment is calculated
2683 only for trivial cases. TODO. */
2686 vect_compute_data_ref_alignment (struct data_reference *dr,
2687 loop_vec_info loop_vinfo)
2689 tree stmt = DR_STMT (dr);
2690 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2691 tree ref = DR_REF (dr);
2694 tree offset = size_zero_node;
2695 tree base, bit_offset, alignment;
2696 tree unit_bits = fold_convert (unsigned_type_node,
2697 build_int_cst (NULL_TREE, BITS_PER_UNIT));
2699 bool base_aligned_p;
2701 if (vect_debug_details (NULL))
2702 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2704 /* Initialize misalignment to unknown. */
2705 DR_MISALIGNMENT (dr) = -1;
2707 scalar_type = TREE_TYPE (ref);
2708 vectype = get_vectype_for_scalar_type (scalar_type);
2711 if (vect_debug_details (NULL))
2713 fprintf (dump_file, "no vectype for stmt: ");
2714 print_generic_expr (dump_file, stmt, TDF_SLIM);
2715 fprintf (dump_file, " scalar_type: ");
2716 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2718 /* It is not possible to vectorize this data reference. */
2721 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
2723 if (TREE_CODE (ref) == ARRAY_REF)
2726 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
2728 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
2729 loop_vinfo, &bit_offset, &base_aligned_p);
2732 if (vect_debug_details (NULL))
2734 fprintf (dump_file, "Unknown alignment for access: ");
2735 print_generic_expr (dump_file,
2736 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
2741 if (!base_aligned_p)
2743 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
2745 if (vect_debug_details (NULL))
2747 fprintf (dump_file, "can't force alignment of ref: ");
2748 print_generic_expr (dump_file, ref, TDF_SLIM);
2753 /* Force the alignment of the decl.
2754 NOTE: This is the only change to the code we make during
2755 the analysis phase, before deciding to vectorize the loop. */
2756 if (vect_debug_details (NULL))
2757 fprintf (dump_file, "force alignment");
2758 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
2759 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
2762 /* At this point we assume that the base is aligned, and the offset from it
2763 (including index, if relevant) has been computed and is in BIT_OFFSET. */
2764 gcc_assert (base_aligned_p
2765 || (TREE_CODE (base) == VAR_DECL
2766 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
2768 /* Convert into bytes. */
2769 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2770 /* Check that there is no remainder in bits. */
2771 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2772 if (!integer_zerop (bit_offset))
2774 if (vect_debug_details (NULL))
2776 fprintf (dump_file, "bit offset alignment: ");
2777 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2782 /* Alignment required, in bytes: */
2783 alignment = fold_convert (unsigned_type_node,
2784 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
2786 /* Modulo alignment. */
2787 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
2788 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
2790 if (vect_debug_details (NULL))
2791 fprintf (dump_file, "unexpected misalign value");
2795 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
2797 if (vect_debug_details (NULL))
2798 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
2804 /* Function vect_compute_array_ref_alignment
2806 Compute the alignment of an array-ref.
2807 The alignment we compute here is relative to
2808 TYPE_ALIGN(VECTYPE) boundary.
2811 OFFSET - the alignment in bits
2812 Return value - the base of the array-ref. E.g,
2813 if the array-ref is a.b[k].c[i][j] the returned
2818 vect_compute_array_ref_alignment (struct data_reference *dr,
2819 loop_vec_info loop_vinfo,
2823 tree array_first_index = size_zero_node;
2825 tree ref = DR_REF (dr);
2826 tree scalar_type = TREE_TYPE (ref);
2827 tree oprnd0 = TREE_OPERAND (ref, 0);
2828 tree dims = size_one_node;
2829 tree misalign = size_zero_node;
2830 tree next_ref, this_offset = size_zero_node;
2834 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
2835 /* The reference is an array without its last index. */
2836 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
2839 vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
2841 /* Alignment is not requested. Just return the base. */
2844 /* Compute alignment. */
2845 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
2847 this_offset = misalign;
2849 /* Check the first index accessed. */
2850 if (!vect_get_first_index (ref, &array_first_index))
2852 if (vect_debug_details (NULL))
2853 fprintf (dump_file, "no first_index for array.");
2857 /* Check the index of the array_ref. */
2858 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
2859 LOOP_VINFO_LOOP (loop_vinfo)->num);
2861 /* FORNOW: In order to simplify the handling of alignment, we make sure
2862 that the first location at which the array is accessed ('init') is on an
2863 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2864 This is too conservative, since we require that
2865 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2866 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2867 This should be relaxed in the future. */
2869 if (!init || !host_integerp (init, 0))
2871 if (vect_debug_details (NULL))
2872 fprintf (dump_file, "non constant init. ");
2876 /* bytes per scalar element: */
2877 nunits = fold_convert (unsigned_type_node,
2878 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
2879 nbits = int_const_binop (MULT_EXPR, nunits,
2880 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
2882 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
2883 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2884 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
2885 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
2887 /* TODO: allow negative misalign values. */
2888 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
2890 if (vect_debug_details (NULL))
2891 fprintf (dump_file, "unexpected misalign value");
2899 /* Function vect_compute_data_refs_alignment
2901 Compute the misalignment of data references in the loop.
2902 This pass may take place at function granularity instead of at loop
2905 FOR NOW: No analysis is actually performed. Misalignment is calculated
2906 only for trivial cases. TODO. */
2909 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2911 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2912 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2915 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2917 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2918 vect_compute_data_ref_alignment (dr, loop_vinfo);
2921 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2923 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2924 vect_compute_data_ref_alignment (dr, loop_vinfo);
2929 /* Function vect_enhance_data_refs_alignment
2931 This pass will use loop versioning and loop peeling in order to enhance
2932 the alignment of data references in the loop.
2934 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2935 original loop is to be vectorized; Any other loops that are created by
2936 the transformations performed in this pass - are not supposed to be
2937 vectorized. This restriction will be relaxed.
2939 FOR NOW: No transformation is actually performed. TODO. */
2942 vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
2945 This pass will require a cost model to guide it whether to apply peeling
2946 or versioning or a combination of the two. For example, the scheme that
2947 intel uses when given a loop with several memory accesses, is as follows:
2948 choose one memory access ('p') which alignment you want to force by doing
2949 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2950 other accesses are not necessarily aligned, or (2) use loop versioning to
2951 generate one loop in which all accesses are aligned, and another loop in
2952 which only 'p' is necessarily aligned.
2954 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2955 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2956 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2958 Devising a cost model is the most critical aspect of this work. It will
2959 guide us on which access to peel for, whether to use loop versioning, how
2960 many versions to create, etc. The cost model will probably consist of
2961 generic considerations as well as target specific considerations (on
2962 powerpc for example, misaligned stores are more painful than misaligned
2965 Here is the general steps involved in alignment enhancements:
2967 -- original loop, before alignment analysis:
2968 for (i=0; i<N; i++){
2969 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2970 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2973 -- After vect_compute_data_refs_alignment:
2974 for (i=0; i<N; i++){
2975 x = q[i]; # DR_MISALIGNMENT(q) = 3
2976 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2979 -- Possibility 1: we do loop versioning:
2981 for (i=0; i<N; i++){ # loop 1A
2982 x = q[i]; # DR_MISALIGNMENT(q) = 3
2983 p[i] = y; # DR_MISALIGNMENT(p) = 0
2987 for (i=0; i<N; i++){ # loop 1B
2988 x = q[i]; # DR_MISALIGNMENT(q) = 3
2989 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2993 -- Possibility 2: we do loop peeling:
2994 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2998 for (i = 3; i < N; i++){ # loop 2A
2999 x = q[i]; # DR_MISALIGNMENT(q) = 0
3000 p[i] = y; # DR_MISALIGNMENT(p) = unknown
3003 -- Possibility 3: combination of loop peeling and versioning:
3004 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
3009 for (i = 3; i<N; i++){ # loop 3A
3010 x = q[i]; # DR_MISALIGNMENT(q) = 0
3011 p[i] = y; # DR_MISALIGNMENT(p) = 0
3015 for (i = 3; i<N; i++){ # loop 3B
3016 x = q[i]; # DR_MISALIGNMENT(q) = 0
3017 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
3021 These loops are later passed to loop_transform to be vectorized. The
3022 vectorizer will use the alignment information to guide the transformation
3023 (whether to generate regular loads/stores, or with special handling for
3029 /* Function vect_analyze_data_refs_alignment
3031 Analyze the alignment of the data-references in the loop.
3032 FOR NOW: Until support for misliagned accesses is in place, only if all
3033 accesses are aligned can the loop be vectorized. This restriction will be
3037 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3039 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3040 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
3044 if (vect_debug_details (NULL))
3045 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
3048 /* This pass may take place at function granularity instead of at loop
3051 vect_compute_data_refs_alignment (loop_vinfo);
3054 /* This pass will use loop versioning and loop peeling in order to enhance
3055 the alignment of data references in the loop.
3056 FOR NOW: we assume that whatever versioning/peeling took place, the
3057 original loop is to be vectorized. Any other loops that were created by
3058 the transformations performed in this pass - are not supposed to be
3059 vectorized. This restriction will be relaxed. */
3061 vect_enhance_data_refs_alignment (loop_vinfo);
3064 /* Finally, check that loop can be vectorized.
3065 FOR NOW: Until support for misaligned accesses is in place, only if all
3066 accesses are aligned can the loop be vectorized. This restriction will be
3069 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3071 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3072 if (!aligned_access_p (dr))
3074 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3075 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3076 fprintf (dump_file, "not vectorized: unaligned store.");
3081 /* The vectorizer now supports misaligned loads, so we don't fail anymore
3082 in the presence of a misaligned read dataref. For some targets however
3083 it may be preferable not to vectorize in such a case as misaligned
3084 accesses are very costly. This should be considered in the future. */
3086 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3088 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3089 if (!aligned_access_p (dr))
3091 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3092 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3093 fprintf (dump_file, "not vectorized: unaligned load.");
3103 /* Function vect_analyze_data_ref_access.
3105 Analyze the access pattern of the data-reference DR. For now, a data access
3106 has to consecutive and aligned to be considered vectorizable. */
3109 vect_analyze_data_ref_access (struct data_reference *dr)
3111 varray_type access_fns = DR_ACCESS_FNS (dr);
3114 unsigned int dimensions, i;
3116 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
3117 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
3118 access is contiguous). */
3119 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
3121 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
3123 access_fn = DR_ACCESS_FN (dr, i);
3125 if (evolution_part_in_loop_num (access_fn,
3126 loop_containing_stmt (DR_STMT (dr))->num))
3128 /* Evolution part is not NULL in this loop (it is neither constant nor
3130 if (vect_debug_details (NULL))
3133 "not vectorized: complicated multidimensional array access.");
3134 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3140 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
3141 if (!evolution_function_is_constant_p (access_fn)
3142 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
3143 access_fn, &init, &step, true))
3145 if (vect_debug_details (NULL))
3147 fprintf (dump_file, "not vectorized: too complicated access function.");
3148 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3157 /* Function vect_analyze_data_ref_accesses.
3159 Analyze the access pattern of all the data references in the loop.
3161 FORNOW: the only access pattern that is considered vectorizable is a
3162 simple step 1 (consecutive) access.
3164 FORNOW: handle only arrays and pointer accesses. */
3167 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
3170 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3171 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3173 if (vect_debug_details (NULL))
3174 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
3176 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3178 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3179 bool ok = vect_analyze_data_ref_access (dr);
3182 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3183 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3184 fprintf (dump_file, "not vectorized: complicated access pattern.");
3189 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3191 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3192 bool ok = vect_analyze_data_ref_access (dr);
3195 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3196 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3197 fprintf (dump_file, "not vectorized: complicated access pattern.");
3206 /* Function vect_analyze_pointer_ref_access.
3209 STMT - a stmt that contains a data-ref
3210 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
3212 If the data-ref access is vectorizable, return a data_reference structure
3213 that represents it (DR). Otherwise - return NULL. */
3215 static struct data_reference *
3216 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
3218 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3219 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
3220 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
3223 tree reftype, innertype;
3224 enum machine_mode innermode;
3225 tree indx_access_fn;
3226 int loopnum = loop->num;
3227 struct data_reference *dr;
3231 if (vect_debug_stats (loop) || vect_debug_details (loop))
3232 fprintf (dump_file, "not vectorized: complicated pointer access.");
3236 if (vect_debug_details (NULL))
3238 fprintf (dump_file, "Access function of ptr: ");
3239 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3242 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
3244 if (vect_debug_stats (loop) || vect_debug_details (loop))
3245 fprintf (dump_file, "not vectorized: pointer access is not simple.");
3251 if (!host_integerp (step,0))
3253 if (vect_debug_stats (loop) || vect_debug_details (loop))
3255 "not vectorized: non constant step for pointer access.");
3259 step_val = TREE_INT_CST_LOW (step);
3261 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
3262 if (TREE_CODE (reftype) != POINTER_TYPE)
3264 if (vect_debug_stats (loop) || vect_debug_details (loop))
3265 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3269 reftype = TREE_TYPE (init);
3270 if (TREE_CODE (reftype) != POINTER_TYPE)
3272 if (vect_debug_stats (loop) || vect_debug_details (loop))
3273 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3277 innertype = TREE_TYPE (reftype);
3278 innermode = TYPE_MODE (innertype);
3279 if (GET_MODE_SIZE (innermode) != step_val)
3281 /* FORNOW: support only consecutive access */
3282 if (vect_debug_stats (loop) || vect_debug_details (loop))
3283 fprintf (dump_file, "not vectorized: non consecutive access.");
3288 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
3289 if (vect_debug_details (NULL))
3291 fprintf (dump_file, "Access function of ptr indx: ");
3292 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
3294 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
3299 /* Function vect_get_symbl_and_dr.
3301 The function returns SYMBL - the relevant variable for
3302 memory tag (for aliasing purposes).
3303 Also data reference structure DR is created.
3306 MEMREF - data reference in STMT
3307 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
3310 DR - data_reference struct for MEMREF
3311 return value - the relevant variable for memory tag (for aliasing purposes).
3316 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
3317 loop_vec_info loop_vinfo, struct data_reference **dr)
3319 tree symbl, oprnd0, oprnd1;
3320 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3322 tree array_base, base;
3323 struct data_reference *new_dr;
3324 bool base_aligned_p;
3327 switch (TREE_CODE (memref))
3330 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
3334 symbl = DR_BASE_NAME (new_dr);
3335 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
3337 switch (TREE_CODE (symbl))
3341 oprnd0 = TREE_OPERAND (symbl, 0);
3342 oprnd1 = TREE_OPERAND (symbl, 1);
3345 /* Only {address_base + offset} expressions are supported,
3346 where address_base can be POINTER_TYPE or ARRAY_TYPE and
3347 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
3348 TODO: swap operands if {offset + address_base}. */
3349 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
3350 && TREE_CODE (oprnd1) != INTEGER_CST)
3351 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
3354 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
3357 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
3358 loop_vinfo, &new_dr);
3362 /* symbl remains unchanged. */
3366 if (vect_debug_details (NULL))
3368 fprintf (dump_file, "unhandled data ref: ");
3369 print_generic_expr (dump_file, memref, TDF_SLIM);
3370 fprintf (dump_file, " (symbl ");
3371 print_generic_expr (dump_file, symbl, TDF_SLIM);
3372 fprintf (dump_file, ") in stmt ");
3373 print_generic_expr (dump_file, stmt, TDF_SLIM);
3380 offset = size_zero_node;
3382 /* Store the array base in the stmt info.
3383 For one dimensional array ref a[i], the base is a,
3384 for multidimensional a[i1][i2]..[iN], the base is
3385 a[i1][i2]..[iN-1]. */
3386 array_base = TREE_OPERAND (memref, 0);
3387 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
3389 new_dr = analyze_array (stmt, memref, is_read);
3392 /* Find the relevant symbol for aliasing purposes. */
3393 base = DR_BASE_NAME (new_dr);
3394 switch (TREE_CODE (base))
3401 symbl = TREE_OPERAND (base, 0);
3405 /* Could have recorded more accurate information -
3406 i.e, the actual FIELD_DECL that is being referenced -
3407 but later passes expect VAR_DECL as the nmt. */
3408 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
3409 loop_vinfo, &offset, &base_aligned_p);
3414 if (vect_debug_details (NULL))
3416 fprintf (dump_file, "unhandled struct/class field access ");
3417 print_generic_expr (dump_file, stmt, TDF_SLIM);
3424 if (vect_debug_details (NULL))
3426 fprintf (dump_file, "unhandled data ref: ");
3427 print_generic_expr (dump_file, memref, TDF_SLIM);
3428 fprintf (dump_file, " in stmt ");
3429 print_generic_expr (dump_file, stmt, TDF_SLIM);
3437 /* Function vect_analyze_data_refs.
3439 Find all the data references in the loop.
3441 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
3442 which base is really an array (not a pointer) and which alignment
3443 can be forced. This restriction will be relaxed. */
3446 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3448 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3449 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3450 int nbbs = loop->num_nodes;
3451 block_stmt_iterator si;
3453 struct data_reference *dr;
3456 bool base_aligned_p;
3459 if (vect_debug_details (NULL))
3460 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
3462 for (j = 0; j < nbbs; j++)
3464 basic_block bb = bbs[j];
3465 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3467 bool is_read = false;
3468 tree stmt = bsi_stmt (si);
3469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3470 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3471 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3472 vuse_optype vuses = STMT_VUSE_OPS (stmt);
3473 varray_type *datarefs = NULL;
3474 int nvuses, nv_may_defs, nv_must_defs;
3478 /* Assumption: there exists a data-ref in stmt, if and only if
3479 it has vuses/vdefs. */
3481 if (!vuses && !v_may_defs && !v_must_defs)
3484 nvuses = NUM_VUSES (vuses);
3485 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
3486 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
3488 if (nvuses && (nv_may_defs || nv_must_defs))
3490 if (vect_debug_details (NULL))
3492 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
3493 print_generic_expr (dump_file, stmt, TDF_SLIM);
3498 if (TREE_CODE (stmt) != MODIFY_EXPR)
3500 if (vect_debug_details (NULL))
3502 fprintf (dump_file, "unexpected vops in stmt: ");
3503 print_generic_expr (dump_file, stmt, TDF_SLIM);
3510 memref = TREE_OPERAND (stmt, 1);
3511 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
3516 memref = TREE_OPERAND (stmt, 0);
3517 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
3521 /* Analyze MEMREF. If it is of a supported form, build data_reference
3522 struct for it (DR) and find the relevant symbol for aliasing
3524 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
3527 if (vect_debug_stats (loop) || vect_debug_details (loop))
3529 fprintf (dump_file, "not vectorized: unhandled data ref: ");
3530 print_generic_expr (dump_file, stmt, TDF_SLIM);
3535 /* Find and record the memtag assigned to this data-ref. */
3536 switch (TREE_CODE (symbl))
3539 STMT_VINFO_MEMTAG (stmt_info) = symbl;
3543 symbl = SSA_NAME_VAR (symbl);
3544 tag = get_var_ann (symbl)->type_mem_tag;
3547 tree ptr = TREE_OPERAND (memref, 0);
3548 if (TREE_CODE (ptr) == SSA_NAME)
3549 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
3553 if (vect_debug_stats (loop) || vect_debug_details (loop))
3554 fprintf (dump_file, "not vectorized: no memtag for ref.");
3557 STMT_VINFO_MEMTAG (stmt_info) = tag;
3561 address_base = TREE_OPERAND (symbl, 0);
3563 switch (TREE_CODE (address_base))
3566 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
3567 STMT_VINFO_MEMTAG (stmt_info) =
3568 vect_get_base_and_bit_offset (dr, DR_BASE_NAME (dr), NULL_TREE,
3569 loop_vinfo, &offset,
3574 STMT_VINFO_MEMTAG (stmt_info) = address_base;
3578 if (vect_debug_stats (loop) || vect_debug_details (loop))
3580 fprintf (dump_file, "not vectorized: unhandled address expression: ");
3581 print_generic_expr (dump_file, stmt, TDF_SLIM);
3588 if (vect_debug_stats (loop) || vect_debug_details (loop))
3590 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
3591 print_generic_expr (dump_file, memref, TDF_SLIM);
3596 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
3597 STMT_VINFO_DATA_REF (stmt_info) = dr;
3605 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3607 /* Function vect_mark_relevant.
3609 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3612 vect_mark_relevant (varray_type worklist, tree stmt)
3614 stmt_vec_info stmt_info;
3616 if (vect_debug_details (NULL))
3617 fprintf (dump_file, "mark relevant.");
3619 if (TREE_CODE (stmt) == PHI_NODE)
3621 VARRAY_PUSH_TREE (worklist, stmt);
3625 stmt_info = vinfo_for_stmt (stmt);
3629 if (vect_debug_details (NULL))
3631 fprintf (dump_file, "mark relevant: no stmt info!!.");
3632 print_generic_expr (dump_file, stmt, TDF_SLIM);
3637 if (STMT_VINFO_RELEVANT_P (stmt_info))
3639 if (vect_debug_details (NULL))
3640 fprintf (dump_file, "already marked relevant.");
3644 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
3645 VARRAY_PUSH_TREE (worklist, stmt);
3649 /* Function vect_stmt_relevant_p.
3651 Return true if STMT in loop that is represented by LOOP_VINFO is
3652 "relevant for vectorization".
3654 A stmt is considered "relevant for vectorization" if:
3655 - it has uses outside the loop.
3656 - it has vdefs (it alters memory).
3657 - control stmts in the loop (except for the exit condition).
3659 CHECKME: what other side effects would the vectorizer allow? */
3662 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
3664 v_may_def_optype v_may_defs;
3665 v_must_def_optype v_must_defs;
3666 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3671 /* cond stmt other than loop exit cond. */
3672 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
3675 /* changing memory. */
3676 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3677 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3678 if (v_may_defs || v_must_defs)
3680 if (vect_debug_details (NULL))
3681 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
3685 /* uses outside the loop. */
3686 df = get_immediate_uses (stmt);
3687 num_uses = num_immediate_uses (df);
3688 for (i = 0; i < num_uses; i++)
3690 tree use = immediate_use (df, i);
3691 basic_block bb = bb_for_stmt (use);
3692 if (!flow_bb_inside_loop_p (loop, bb))
3694 if (vect_debug_details (NULL))
3695 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
3704 /* Function vect_mark_stmts_to_be_vectorized.
3706 Not all stmts in the loop need to be vectorized. For example:
3715 Stmt 1 and 3 do not need to be vectorized, because loop control and
3716 addressing of vectorized data-refs are handled differently.
3718 This pass detects such stmts. */
3721 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3723 varray_type worklist;
3724 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3725 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3726 unsigned int nbbs = loop->num_nodes;
3727 block_stmt_iterator si;
3733 stmt_vec_info stmt_info;
3735 if (vect_debug_details (NULL))
3736 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3738 VARRAY_TREE_INIT (worklist, 64, "work list");
3740 /* 1. Init worklist. */
3742 for (i = 0; i < nbbs; i++)
3744 basic_block bb = bbs[i];
3745 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3747 stmt = bsi_stmt (si);
3749 if (vect_debug_details (NULL))
3751 fprintf (dump_file, "init: stmt relevant? ");
3752 print_generic_expr (dump_file, stmt, TDF_SLIM);
3755 stmt_info = vinfo_for_stmt (stmt);
3756 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
3758 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3759 vect_mark_relevant (worklist, stmt);
3764 /* 2. Process_worklist */
3766 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3768 stmt = VARRAY_TOP_TREE (worklist);
3769 VARRAY_POP (worklist);
3771 if (vect_debug_details (NULL))
3773 fprintf (dump_file, "worklist: examine stmt: ");
3774 print_generic_expr (dump_file, stmt, TDF_SLIM);
3777 /* Examine the USES in this statement. Mark all the statements which
3778 feed this statement's uses as "relevant", unless the USE is used as
3781 if (TREE_CODE (stmt) == PHI_NODE)
3783 /* follow the def-use chain inside the loop. */
3784 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3786 tree arg = PHI_ARG_DEF (stmt, j);
3787 tree def_stmt = NULL_TREE;
3789 if (!vect_is_simple_use (arg, loop, &def_stmt))
3791 if (vect_debug_details (NULL))
3792 fprintf (dump_file, "worklist: unsupported use.");
3793 varray_clear (worklist);
3799 if (vect_debug_details (NULL))
3801 fprintf (dump_file, "worklist: def_stmt: ");
3802 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3805 bb = bb_for_stmt (def_stmt);
3806 if (flow_bb_inside_loop_p (loop, bb))
3807 vect_mark_relevant (worklist, def_stmt);
3811 ann = stmt_ann (stmt);
3812 use_ops = USE_OPS (ann);
3814 for (i = 0; i < NUM_USES (use_ops); i++)
3816 tree use = USE_OP (use_ops, i);
3818 /* We are only interested in uses that need to be vectorized. Uses
3819 that are used for address computation are not considered relevant.
3821 if (exist_non_indexing_operands_for_use_p (use, stmt))
3823 tree def_stmt = NULL_TREE;
3825 if (!vect_is_simple_use (use, loop, &def_stmt))
3827 if (vect_debug_details (NULL))
3828 fprintf (dump_file, "worklist: unsupported use.");
3829 varray_clear (worklist);
3836 if (vect_debug_details (NULL))
3838 fprintf (dump_file, "worklist: examine use %d: ", i);
3839 print_generic_expr (dump_file, use, TDF_SLIM);
3842 bb = bb_for_stmt (def_stmt);
3843 if (flow_bb_inside_loop_p (loop, bb))
3844 vect_mark_relevant (worklist, def_stmt);
3847 } /* while worklist */
3849 varray_clear (worklist);
3854 /* Function vect_get_loop_niters.
3856 Determine how many iterations the loop is executed. */
3859 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3863 if (vect_debug_details (NULL))
3864 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3866 niters = number_of_iterations_in_loop (loop);
3868 if (niters != NULL_TREE
3869 && niters != chrec_dont_know
3870 && host_integerp (niters,0))
3872 *number_of_iterations = TREE_INT_CST_LOW (niters);
3874 if (vect_debug_details (NULL))
3875 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3876 *number_of_iterations);
3879 return get_loop_exit_condition (loop);
3883 /* Function vect_analyze_loop_form.
3885 Verify the following restrictions (some may be relaxed in the future):
3886 - it's an inner-most loop
3887 - number of BBs = 2 (which are the loop header and the latch)
3888 - the loop has a pre-header
3889 - the loop has a single entry and exit
3890 - the loop exit condition is simple enough, and the number of iterations
3891 can be analyzed (a countable loop). */
3893 static loop_vec_info
3894 vect_analyze_loop_form (struct loop *loop)
3896 loop_vec_info loop_vinfo;
3898 HOST_WIDE_INT number_of_iterations = -1;
3900 if (vect_debug_details (loop))
3901 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3904 || !loop->single_exit
3905 || loop->num_nodes != 2)
3907 if (vect_debug_stats (loop) || vect_debug_details (loop))
3909 fprintf (dump_file, "not vectorized: bad loop form. ");
3911 fprintf (dump_file, "nested loop.");
3912 else if (!loop->single_exit)
3913 fprintf (dump_file, "multiple exits.");
3914 else if (loop->num_nodes != 2)
3915 fprintf (dump_file, "too many BBs in loop.");
3921 /* We assume that the loop exit condition is at the end of the loop. i.e,
3922 that the loop is represented as a do-while (with a proper if-guard
3923 before the loop if needed), where the loop header contains all the
3924 executable statements, and the latch is empty. */
3925 if (!empty_block_p (loop->latch))
3927 if (vect_debug_stats (loop) || vect_debug_details (loop))
3928 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3932 if (empty_block_p (loop->header))
3934 if (vect_debug_stats (loop) || vect_debug_details (loop))
3935 fprintf (dump_file, "not vectorized: empty loop.");
3939 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3942 if (vect_debug_stats (loop) || vect_debug_details (loop))
3943 fprintf (dump_file, "not vectorized: complicated exit condition.");
3947 if (number_of_iterations < 0)
3949 if (vect_debug_stats (loop) || vect_debug_details (loop))
3950 fprintf (dump_file, "not vectorized: unknown loop bound.");
3954 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3956 if (vect_debug_stats (loop) || vect_debug_details (loop))
3957 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3961 loop_vinfo = new_loop_vec_info (loop);
3962 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3963 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3969 /* Function vect_analyze_loop.
3971 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3972 for it. The different analyses will record information in the
3973 loop_vec_info struct. */
3975 static loop_vec_info
3976 vect_analyze_loop (struct loop *loop)
3979 loop_vec_info loop_vinfo;
3981 if (vect_debug_details (NULL))
3982 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3984 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3986 loop_vinfo = vect_analyze_loop_form (loop);
3989 if (vect_debug_details (loop))
3990 fprintf (dump_file, "bad loop form.");
3994 /* Find all data references in the loop (which correspond to vdefs/vuses)
3995 and analyze their evolution in the loop.
3997 FORNOW: Handle only simple, array references, which
3998 alignment can be forced, and aligned pointer-references. */
4000 ok = vect_analyze_data_refs (loop_vinfo);
4003 if (vect_debug_details (loop))
4004 fprintf (dump_file, "bad data references.");
4005 destroy_loop_vec_info (loop_vinfo);
4009 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
4011 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
4014 if (vect_debug_details (loop))
4015 fprintf (dump_file, "unexpected pattern.");
4016 if (vect_debug_details (loop))
4017 fprintf (dump_file, "not vectorized: unexpected pattern.");
4018 destroy_loop_vec_info (loop_vinfo);
4022 /* Check that all cross-iteration scalar data-flow cycles are OK.
4023 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4025 ok = vect_analyze_scalar_cycles (loop_vinfo);
4028 if (vect_debug_details (loop))
4029 fprintf (dump_file, "bad scalar cycle.");
4030 destroy_loop_vec_info (loop_vinfo);
4034 /* Analyze data dependences between the data-refs in the loop.
4035 FORNOW: fail at the first data dependence that we encounter. */
4037 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4040 if (vect_debug_details (loop))
4041 fprintf (dump_file, "bad data dependence.");
4042 destroy_loop_vec_info (loop_vinfo);
4046 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4047 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4049 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4052 if (vect_debug_details (loop))
4053 fprintf (dump_file, "bad data access.");
4054 destroy_loop_vec_info (loop_vinfo);
4058 /* Analyze the alignment of the data-refs in the loop.
4059 FORNOW: Only aligned accesses are handled. */
4061 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4064 if (vect_debug_details (loop))
4065 fprintf (dump_file, "bad data alignment.");
4066 destroy_loop_vec_info (loop_vinfo);
4070 /* Scan all the operations in the loop and make sure they are
4073 ok = vect_analyze_operations (loop_vinfo);
4076 if (vect_debug_details (loop))
4077 fprintf (dump_file, "bad operation or unsupported loop bound.");
4078 destroy_loop_vec_info (loop_vinfo);
4082 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4088 /* Function need_imm_uses_for.
4090 Return whether we ought to include information for 'var'
4091 when calculating immediate uses. For this pass we only want use
4092 information for non-virtual variables. */
4095 need_imm_uses_for (tree var)
4097 return is_gimple_reg (var);
4101 /* Function vectorize_loops.
4103 Entry Point to loop vectorization phase. */
4106 vectorize_loops (struct loops *loops)
4108 unsigned int i, loops_num;
4109 unsigned int num_vectorized_loops = 0;
4111 /* Does the target support SIMD? */
4112 /* FORNOW: until more sophisticated machine modelling is in place. */
4113 if (!UNITS_PER_SIMD_WORD)
4115 if (vect_debug_details (NULL))
4116 fprintf (dump_file, "vectorizer: target vector size is not defined.");
4120 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
4122 /* ----------- Analyze loops. ----------- */
4124 /* If some loop was duplicated, it gets bigger number
4125 than all previously defined loops. This fact allows us to run
4126 only over initial loops skipping newly generated ones. */
4127 loops_num = loops->num;
4128 for (i = 1; i < loops_num; i++)
4130 loop_vec_info loop_vinfo;
4131 struct loop *loop = loops->parray[i];
4136 loop_vinfo = vect_analyze_loop (loop);
4137 loop->aux = loop_vinfo;
4139 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
4142 vect_transform_loop (loop_vinfo, loops);
4143 num_vectorized_loops++;
4146 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
4147 fprintf (dump_file, "\nvectorized %u loops in function.\n",
4148 num_vectorized_loops);
4150 /* ----------- Finalize. ----------- */
4153 for (i = 1; i < loops_num; i++)
4155 struct loop *loop = loops->parray[i];
4156 loop_vec_info loop_vinfo;
4160 loop_vinfo = loop->aux;
4161 destroy_loop_vec_info (loop_vinfo);
4165 rewrite_into_ssa (false);
4166 if (bitmap_first_set_bit (vars_to_rename) >= 0)
4168 /* The rewrite of ssa names may cause violation of loop closed ssa
4169 form invariants. TODO -- avoid these rewrites completely.
4170 Information in virtual phi nodes is sufficient for it. */
4171 rewrite_into_loop_closed_ssa ();
4173 bitmap_clear (vars_to_rename);