2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
22 /* Loop Vectorization Pass.
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
29 For example, the vectorizer transforms the following simple loop:
31 short a[N]; short b[N]; short c[N]; int i;
37 as if it was manually vectorized by rewriting the source code into:
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
44 for (i=0; i<N/8; i++){
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
86 For example, say stmt S1 was vectorized into stmt VS1:
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
125 #include "coretypes.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
139 #include "cfglayout.h"
142 #include "tree-chrec.h"
143 #include "tree-data-ref.h"
144 #include "tree-scalar-evolution.h"
145 #include "tree-vectorizer.h"
146 #include "tree-pass.h"
148 /* Main analysis functions. */
149 static loop_vec_info vect_analyze_loop (struct loop *);
150 static loop_vec_info vect_analyze_loop_form (struct loop *);
151 static bool vect_analyze_data_refs (loop_vec_info);
152 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
153 static bool vect_analyze_scalar_cycles (loop_vec_info);
154 static bool vect_analyze_data_ref_accesses (loop_vec_info);
155 static bool vect_analyze_data_refs_alignment (loop_vec_info);
156 static void vect_compute_data_refs_alignment (loop_vec_info);
157 static bool vect_analyze_operations (loop_vec_info);
159 /* Main code transformation functions. */
160 static void vect_transform_loop (loop_vec_info, struct loops *);
161 static void vect_transform_loop_bound (loop_vec_info);
162 static bool vect_transform_stmt (tree, block_stmt_iterator *);
163 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
164 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
165 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
166 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
167 static void vect_align_data_ref (tree);
168 static void vect_enhance_data_refs_alignment (loop_vec_info);
170 /* Utility functions for the analyses. */
171 static bool vect_is_simple_use (tree , struct loop *, tree *);
172 static bool exist_non_indexing_operands_for_use_p (tree, tree);
173 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
174 static void vect_mark_relevant (varray_type, tree);
175 static bool vect_stmt_relevant_p (tree, loop_vec_info);
176 static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
177 static bool vect_compute_data_ref_alignment
178 (struct data_reference *, loop_vec_info);
179 static bool vect_analyze_data_ref_access (struct data_reference *);
180 static bool vect_get_first_index (tree, tree *);
181 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
182 static struct data_reference * vect_analyze_pointer_ref_access
184 static tree vect_get_base_and_bit_offset
185 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
186 static struct data_reference * vect_analyze_pointer_ref_access
188 static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
189 static tree vect_compute_array_ref_alignment
190 (struct data_reference *, loop_vec_info, tree, tree *);
191 static tree vect_get_ptr_offset (tree, tree, tree *);
192 static tree vect_get_symbl_and_dr
193 (tree, tree, bool, loop_vec_info, struct data_reference **);
195 /* Utility functions for the code transformation. */
196 static tree vect_create_destination_var (tree, tree);
197 static tree vect_create_data_ref_ptr
198 (tree, block_stmt_iterator *, tree, tree *, bool);
199 static tree vect_create_index_for_vector_ref
200 (struct loop *, block_stmt_iterator *);
201 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
202 static tree get_vectype_for_scalar_type (tree);
203 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
204 static tree vect_get_vec_def_for_operand (tree, tree);
205 static tree vect_init_vector (tree, tree);
206 static void vect_finish_stmt_generation
207 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
209 /* Utilities for creation and deletion of vec_info structs. */
210 loop_vec_info new_loop_vec_info (struct loop *loop);
211 void destroy_loop_vec_info (loop_vec_info);
212 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
214 static bool vect_debug_stats (struct loop *loop);
215 static bool vect_debug_details (struct loop *loop);
218 /* Function new_stmt_vec_info.
220 Create and initialize a new stmt_vec_info struct for STMT. */
223 new_stmt_vec_info (tree stmt, struct loop *loop)
226 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
228 STMT_VINFO_TYPE (res) = undef_vec_info_type;
229 STMT_VINFO_STMT (res) = stmt;
230 STMT_VINFO_LOOP (res) = loop;
231 STMT_VINFO_RELEVANT_P (res) = 0;
232 STMT_VINFO_VECTYPE (res) = NULL;
233 STMT_VINFO_VEC_STMT (res) = NULL;
234 STMT_VINFO_DATA_REF (res) = NULL;
235 STMT_VINFO_MEMTAG (res) = NULL;
236 STMT_VINFO_VECT_DR_BASE (res) = NULL;
242 /* Function new_loop_vec_info.
244 Create and initialize a new loop_vec_info struct for LOOP, as well as
245 stmt_vec_info structs for all the stmts in LOOP. */
248 new_loop_vec_info (struct loop *loop)
252 block_stmt_iterator si;
255 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
257 bbs = get_loop_body (loop);
259 /* Create stmt_info for all stmts in the loop. */
260 for (i = 0; i < loop->num_nodes; i++)
262 basic_block bb = bbs[i];
263 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
265 tree stmt = bsi_stmt (si);
268 get_stmt_operands (stmt);
269 ann = stmt_ann (stmt);
270 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
274 LOOP_VINFO_LOOP (res) = loop;
275 LOOP_VINFO_BBS (res) = bbs;
276 LOOP_VINFO_EXIT_COND (res) = NULL;
277 LOOP_VINFO_NITERS (res) = -1;
278 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
279 LOOP_VINFO_VECT_FACTOR (res) = 0;
280 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
281 "loop_write_datarefs");
282 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
283 "loop_read_datarefs");
288 /* Function destroy_loop_vec_info.
290 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
291 stmts in the loop. */
294 destroy_loop_vec_info (loop_vec_info loop_vinfo)
299 block_stmt_iterator si;
305 loop = LOOP_VINFO_LOOP (loop_vinfo);
307 bbs = LOOP_VINFO_BBS (loop_vinfo);
308 nbbs = loop->num_nodes;
310 for (j = 0; j < nbbs; j++)
312 basic_block bb = bbs[j];
313 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
315 tree stmt = bsi_stmt (si);
316 stmt_ann_t ann = stmt_ann (stmt);
317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
319 set_stmt_info (ann, NULL);
323 free (LOOP_VINFO_BBS (loop_vinfo));
324 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
325 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
331 /* Function debug_loop_stats.
333 For vectorization statistics dumps. */
336 vect_debug_stats (struct loop *loop)
339 block_stmt_iterator si;
340 tree node = NULL_TREE;
342 if (!dump_file || !(dump_flags & TDF_STATS))
347 fprintf (dump_file, "\n");
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
358 node = bsi_stmt (si);
359 if (node && EXPR_P (node) && EXPR_LOCUS (node))
363 if (node && EXPR_P (node) && EXPR_LOCUS (node)
364 && EXPR_FILENAME (node) && EXPR_LINENO (node))
366 fprintf (dump_file, "\nloop at %s:%d: ",
367 EXPR_FILENAME (node), EXPR_LINENO (node));
375 /* Function debug_loop_details.
377 For vectorization debug dumps. */
380 vect_debug_details (struct loop *loop)
383 block_stmt_iterator si;
384 tree node = NULL_TREE;
386 if (!dump_file || !(dump_flags & TDF_DETAILS))
391 fprintf (dump_file, "\n");
400 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
402 node = bsi_stmt (si);
403 if (node && EXPR_P (node) && EXPR_LOCUS (node))
407 if (node && EXPR_P (node) && EXPR_LOCUS (node)
408 && EXPR_FILENAME (node) && EXPR_LINENO (node))
410 fprintf (dump_file, "\nloop at %s:%d: ",
411 EXPR_FILENAME (node), EXPR_LINENO (node));
419 /* Function vect_get_ptr_offset
421 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
424 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
425 tree vectype ATTRIBUTE_UNUSED,
426 tree *offset ATTRIBUTE_UNUSED)
428 /* TODO: Use alignment information. */
433 /* Function vect_get_base_and_bit_offset
435 Return the BASE of the data reference EXPR.
436 If VECTYPE is given, also compute the OFFSET from BASE in bits.
437 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
438 bits of 'a.b[i] + 4B' from a.
441 EXPR - the memory reference that is being analyzed
442 DR - the data_reference struct of the _original_ memory reference
443 (Note: DR_REF (DR) is not necessarily EXPR)
444 VECTYPE - the type that defines the alignment (i.e, we compute
445 alignment relative to TYPE_ALIGN(VECTYPE))
448 BASE (returned value) - the base of the data reference EXPR.
449 E.g, if EXPR is a.b[k].c[i][j] the returned
451 OFFSET - offset of EXPR from BASE in bits
452 BASE_ALIGNED_P - indicates if BASE is aligned
454 If something unexpected is encountered (an unsupported form of data-ref),
455 or if VECTYPE is given but OFFSET cannot be determined:
456 then NULL_TREE is returned. */
459 vect_get_base_and_bit_offset (struct data_reference *dr,
462 loop_vec_info loop_vinfo,
464 bool *base_aligned_p)
466 tree this_offset = size_zero_node;
467 tree base = NULL_TREE;
470 struct data_reference *array_dr;
471 enum tree_code code = TREE_CODE (expr);
473 *base_aligned_p = false;
477 /* These cases end the recursion: */
479 *offset = size_zero_node;
480 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
481 *base_aligned_p = true;
488 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
491 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
493 base = vect_get_ptr_offset (expr, vectype, offset);
495 *base_aligned_p = true;
499 *base_aligned_p = true;
500 *offset = size_zero_node;
506 *offset = int_const_binop (MULT_EXPR, expr,
507 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
510 /* These cases continue the recursion: */
512 oprnd0 = TREE_OPERAND (expr, 0);
513 oprnd1 = TREE_OPERAND (expr, 1);
515 this_offset = bit_position (oprnd1);
516 if (vectype && !host_integerp (this_offset, 1))
522 oprnd0 = TREE_OPERAND (expr, 0);
527 oprnd0 = TREE_OPERAND (expr, 0);
532 if (DR_REF (dr) != expr)
533 /* Build array data_reference struct if the existing DR_REF
534 doesn't match EXPR. This happens, for example, when the
535 EXPR is *T and T is initialized to &arr[indx]. The DR struct
536 contains information on the access of T, not of arr. In order
537 to continue the analysis, we create a new DR struct that
538 describes the access of arr.
540 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
544 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
545 vectype, &this_offset);
550 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
552 *offset = this_offset;
553 *base_aligned_p = true;
560 /* In case we have a PLUS_EXPR of the form
561 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
562 This is verified in vect_get_symbl_and_dr. */
563 oprnd0 = TREE_OPERAND (expr, 0);
564 oprnd1 = TREE_OPERAND (expr, 1);
566 base = vect_get_base_and_bit_offset
567 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
568 if (vectype && !base)
578 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
579 loop_vinfo, offset, base_aligned_p);
583 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
584 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
587 if (vect_debug_details (NULL))
589 print_generic_expr (dump_file, expr, TDF_SLIM);
590 fprintf (dump_file, " --> total offset for ref: ");
591 print_generic_expr (dump_file, *offset, TDF_SLIM);
599 /* Function vect_force_dr_alignment_p.
601 Returns whether the alignment of a DECL can be forced to be aligned
602 on ALIGNMENT bit boundary. */
605 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
607 if (TREE_CODE (decl) != VAR_DECL)
610 if (DECL_EXTERNAL (decl))
613 if (TREE_STATIC (decl))
614 return (alignment <= MAX_OFILE_ALIGNMENT);
616 /* This is not 100% correct. The absolute correct stack alignment
617 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
618 PREFERRED_STACK_BOUNDARY is honored by all translation units.
619 However, until someone implements forced stack alignment, SSE
620 isn't really usable without this. */
621 return (alignment <= PREFERRED_STACK_BOUNDARY);
625 /* Function vect_get_new_vect_var.
627 Returns a name for a new variable. The current naming scheme appends the
628 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
629 the name of vectorizer generated variables, and appends that to NAME if
633 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
639 if (var_kind == vect_simple_var)
644 prefix_len = strlen (prefix);
647 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
649 new_vect_var = create_tmp_var (type, prefix);
655 /* Function vect_create_index_for_vector_ref.
657 Create (and return) an index variable, along with it's update chain in the
658 loop. This variable will be used to access a memory location in a vector
662 LOOP: The loop being vectorized.
663 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
664 function can be added here, or in the loop pre-header.
667 Return an index that will be used to index a vector array. It is expected
668 that a pointer to the first vector will be used as the base address for the
671 FORNOW: we are not trying to be efficient, just creating a new index each
672 time from scratch. At this time all vector references could use the same
675 TODO: create only one index to be used by all vector references. Record
676 the index in the LOOP_VINFO the first time this procedure is called and
677 return it on subsequent calls. The increment of this index must be placed
678 just before the conditional expression that ends the single block loop. */
681 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
684 tree indx_before_incr, indx_after_incr;
686 /* It is assumed that the base pointer used for vectorized access contains
687 the address of the first vector. Therefore the index used for vectorized
688 access must be initialized to zero and incremented by 1. */
690 init = integer_zero_node;
691 step = integer_one_node;
693 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
694 create_iv (init, step, NULL_TREE, loop, bsi, false,
695 &indx_before_incr, &indx_after_incr);
697 return indx_before_incr;
701 /* Function vect_create_addr_base_for_vector_ref.
703 Create an expression that computes the address of the first memory location
704 that will be accessed for a data reference.
707 STMT: The statement containing the data reference.
708 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
709 OFFSET: Optional. If supplied, it is be added to the initial address.
712 1. Return an SSA_NAME whose value is the address of the memory location of the
713 first vector of the data reference.
714 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
715 these statement(s) which define the returned SSA_NAME.
717 FORNOW: We are only handling array accesses with step 1. */
720 vect_create_addr_base_for_vector_ref (tree stmt,
724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
725 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
726 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
727 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
728 tree base_name = unshare_expr (DR_BASE_NAME (dr));
729 tree ref = DR_REF (dr);
730 tree data_ref_base_type = TREE_TYPE (data_ref_base);
731 tree scalar_type = TREE_TYPE (ref);
732 tree scalar_ptr_type = build_pointer_type (scalar_type);
734 tree init_val, step, init_oval;
736 bool is_ptr_ref, is_array_ref, is_addr_expr;
741 tree addr_base, addr_expr;
744 /* Only the access function of the last index is relevant (i_n in
745 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
746 access_fn = DR_ACCESS_FN (dr, 0);
747 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
749 init_oval = integer_zero_node;
751 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
752 && TREE_CODE (data_ref_base) == SSA_NAME;
753 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE
754 && (TREE_CODE (data_ref_base) == VAR_DECL
755 || TREE_CODE (data_ref_base) == COMPONENT_REF
756 || TREE_CODE (data_ref_base) == ARRAY_REF);
757 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
758 || TREE_CODE (data_ref_base) == PLUS_EXPR
759 || TREE_CODE (data_ref_base) == MINUS_EXPR;
760 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
762 /** Create: &(base[init_val])
764 if data_ref_base is an ARRAY_TYPE:
767 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
768 base = *((scalar_array *) data_ref_base)
772 array_base = data_ref_base;
773 else /* is_ptr_ref or is_addr_expr */
775 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
776 tree scalar_array_type = build_array_type (scalar_type, 0);
777 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
778 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
779 add_referenced_tmp_var (array_ptr);
781 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
782 add_referenced_tmp_var (dest);
784 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
785 append_to_statement_list_force (new_stmt, new_stmt_list);
787 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
788 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
789 new_temp = make_ssa_name (array_ptr, vec_stmt);
790 TREE_OPERAND (vec_stmt, 0) = new_temp;
791 append_to_statement_list_force (vec_stmt, new_stmt_list);
794 array_base = build_fold_indirect_ref (new_temp);
797 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
798 add_referenced_tmp_var (dest);
799 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
800 append_to_statement_list_force (new_stmt, new_stmt_list);
804 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
805 add_referenced_tmp_var (tmp);
806 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
807 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
808 init_val = make_ssa_name (tmp, vec_stmt);
809 TREE_OPERAND (vec_stmt, 0) = init_val;
810 append_to_statement_list_force (vec_stmt, new_stmt_list);
813 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
814 NULL_TREE, NULL_TREE);
815 addr_base = build_fold_addr_expr (array_ref);
817 /* addr_expr = addr_base */
818 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
819 get_name (base_name));
820 add_referenced_tmp_var (addr_expr);
821 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
822 new_temp = make_ssa_name (addr_expr, vec_stmt);
823 TREE_OPERAND (vec_stmt, 0) = new_temp;
824 append_to_statement_list_force (vec_stmt, new_stmt_list);
830 /* Function get_vectype_for_scalar_type.
832 Returns the vector type corresponding to SCALAR_TYPE as supported
836 get_vectype_for_scalar_type (tree scalar_type)
838 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
839 int nbytes = GET_MODE_SIZE (inner_mode);
846 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
848 nunits = UNITS_PER_SIMD_WORD / nbytes;
850 vectype = build_vector_type (scalar_type, nunits);
851 if (TYPE_MODE (vectype) == BLKmode)
857 /* Function vect_align_data_ref.
859 Handle mislignment of a memory accesses.
861 FORNOW: Can't handle misaligned accesses.
862 Make sure that the dataref is aligned. */
865 vect_align_data_ref (tree stmt)
867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
868 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
870 /* FORNOW: can't handle misaligned accesses;
871 all accesses expected to be aligned. */
872 gcc_assert (aligned_access_p (dr));
876 /* Function vect_create_data_ref_ptr.
878 Create a memory reference expression for vector access, to be used in a
879 vector load/store stmt. The reference is based on a new pointer to vector
883 1. STMT: a stmt that references memory. Expected to be of the form
884 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
885 2. BSI: block_stmt_iterator where new stmts can be added.
886 3. OFFSET (optional): an offset to be added to the initial address accessed
887 by the data-ref in STMT.
888 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
889 pointing to the initial address.
892 1. Declare a new ptr to vector_type, and have it point to the base of the
893 data reference (initial addressed accessed by the data reference).
894 For example, for vector of type V8HI, the following code is generated:
897 vp = (v8hi *)initial_address;
899 if OFFSET is not supplied:
900 initial_address = &a[init];
901 if OFFSET is supplied:
902 initial_address = &a[init + OFFSET];
904 Return the initial_address in INITIAL_ADDRESS.
906 2. Create a data-reference in the loop based on the new vector pointer vp,
907 and using a new index variable 'idx' as follows:
911 where if ONLY_INIT is true:
914 update = idx + vector_type_size
916 Return the pointer vp'.
919 FORNOW: handle only aligned and consecutive accesses. */
922 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
923 tree *initial_address, bool only_init)
926 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
927 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
928 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
933 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
934 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
935 vuse_optype vuses = STMT_VUSE_OPS (stmt);
936 int nvuses, nv_may_defs, nv_must_defs;
940 tree new_stmt_list = NULL_TREE;
942 edge pe = loop_preheader_edge (loop);
949 base_name = unshare_expr (DR_BASE_NAME (dr));
950 if (vect_debug_details (NULL))
952 tree data_ref_base = base_name;
953 fprintf (dump_file, "create array_ref of type: ");
954 print_generic_expr (dump_file, vectype, TDF_SLIM);
955 if (TREE_CODE (data_ref_base) == VAR_DECL)
956 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
957 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
958 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
959 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
960 fprintf (dump_file, "vectorizing a record based array ref: ");
961 else if (TREE_CODE (data_ref_base) == SSA_NAME)
962 fprintf (dump_file, "vectorizing a pointer ref: ");
963 print_generic_expr (dump_file, base_name, TDF_SLIM);
966 /** (1) Create the new vector-pointer variable: **/
968 vect_ptr_type = build_pointer_type (vectype);
969 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
970 get_name (base_name));
971 add_referenced_tmp_var (vect_ptr);
974 /** (2) Handle aliasing information of the new vector-pointer: **/
976 tag = STMT_VINFO_MEMTAG (stmt_info);
978 get_var_ann (vect_ptr)->type_mem_tag = tag;
980 /* Mark for renaming all aliased variables
981 (i.e, the may-aliases of the type-mem-tag). */
982 nvuses = NUM_VUSES (vuses);
983 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
984 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
985 for (i = 0; i < nvuses; i++)
987 tree use = VUSE_OP (vuses, i);
988 if (TREE_CODE (use) == SSA_NAME)
989 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
991 for (i = 0; i < nv_may_defs; i++)
993 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
994 if (TREE_CODE (def) == SSA_NAME)
995 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
997 for (i = 0; i < nv_must_defs; i++)
999 tree def = V_MUST_DEF_OP (v_must_defs, i);
1000 if (TREE_CODE (def) == SSA_NAME)
1001 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1005 /** (3) Calculate the initial address the vector-pointer, and set
1006 the vector-pointer to point to it before the loop: **/
1008 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1009 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1011 pe = loop_preheader_edge (loop);
1012 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1013 gcc_assert (!new_bb);
1014 *initial_address = new_temp;
1016 /* Create: p = (vectype *) initial_base */
1017 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1018 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1019 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1020 TREE_OPERAND (vec_stmt, 0) = new_temp;
1021 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1022 gcc_assert (!new_bb);
1023 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1026 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1028 if (only_init) /* No update in loop is required. */
1029 return vect_ptr_init;
1031 idx = vect_create_index_for_vector_ref (loop, bsi);
1033 /* Create: update = idx * vectype_size */
1034 ptr_update = create_tmp_var (integer_type_node, "update");
1035 add_referenced_tmp_var (ptr_update);
1036 vectype_size = build_int_cst (integer_type_node,
1037 GET_MODE_SIZE (TYPE_MODE (vectype)));
1038 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1039 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1040 new_temp = make_ssa_name (ptr_update, vec_stmt);
1041 TREE_OPERAND (vec_stmt, 0) = new_temp;
1042 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1044 /* Create: data_ref_ptr = vect_ptr_init + update */
1045 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1046 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1047 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1048 TREE_OPERAND (vec_stmt, 0) = new_temp;
1049 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1050 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1052 return data_ref_ptr;
1056 /* Function vect_create_destination_var.
1058 Create a new temporary of type VECTYPE. */
1061 vect_create_destination_var (tree scalar_dest, tree vectype)
1064 const char *new_name;
1066 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1068 new_name = get_name (scalar_dest);
1071 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1072 add_referenced_tmp_var (vec_dest);
1078 /* Function vect_init_vector.
1080 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1081 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1082 used in the vectorization of STMT. */
1085 vect_init_vector (tree stmt, tree vector_var)
1087 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1088 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1097 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1098 add_referenced_tmp_var (new_var);
1100 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1101 new_temp = make_ssa_name (new_var, init_stmt);
1102 TREE_OPERAND (init_stmt, 0) = new_temp;
1104 pe = loop_preheader_edge (loop);
1105 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1106 gcc_assert (!new_bb);
1108 if (vect_debug_details (NULL))
1110 fprintf (dump_file, "created new init_stmt: ");
1111 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1114 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1119 /* Function vect_get_vec_def_for_operand.
1121 OP is an operand in STMT. This function returns a (vector) def that will be
1122 used in the vectorized stmt for STMT.
1124 In the case that OP is an SSA_NAME which is defined in the loop, then
1125 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1127 In case OP is an invariant or constant, a new stmt that creates a vector def
1128 needs to be introduced. */
1131 vect_get_vec_def_for_operand (tree op, tree stmt)
1136 stmt_vec_info def_stmt_info = NULL;
1137 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1138 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1139 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1140 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1147 if (vect_debug_details (NULL))
1149 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1150 print_generic_expr (dump_file, op, TDF_SLIM);
1153 /** ===> Case 1: operand is a constant. **/
1155 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1157 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1160 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1161 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1162 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1166 /* Build a tree with vector elements. */
1167 if (vect_debug_details (NULL))
1168 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1170 for (i = nunits - 1; i >= 0; --i)
1172 t = tree_cons (NULL_TREE, op, t);
1174 vec_cst = build_vector (vectype, t);
1175 return vect_init_vector (stmt, vec_cst);
1178 gcc_assert (TREE_CODE (op) == SSA_NAME);
1180 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
1182 def_stmt = SSA_NAME_DEF_STMT (op);
1183 def_stmt_info = vinfo_for_stmt (def_stmt);
1185 if (vect_debug_details (NULL))
1187 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
1188 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1192 /** ==> Case 2.1: operand is defined inside the loop. **/
1196 /* Get the def from the vectorized stmt. */
1198 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1199 gcc_assert (vec_stmt);
1200 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
1205 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1206 it is a reduction/induction. **/
1208 bb = bb_for_stmt (def_stmt);
1209 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1211 if (vect_debug_details (NULL))
1212 fprintf (dump_file, "reduction/induction - unsupported.");
1213 internal_error ("no support for reduction/induction"); /* FORNOW */
1217 /** ==> Case 2.3: operand is defined outside the loop -
1218 it is a loop invariant. */
1220 switch (TREE_CODE (def_stmt))
1223 def = PHI_RESULT (def_stmt);
1226 def = TREE_OPERAND (def_stmt, 0);
1229 def = TREE_OPERAND (def_stmt, 0);
1230 gcc_assert (IS_EMPTY_STMT (def_stmt));
1234 if (vect_debug_details (NULL))
1236 fprintf (dump_file, "unsupported defining stmt: ");
1237 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1239 internal_error ("unsupported defining stmt");
1242 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1244 if (vect_debug_details (NULL))
1245 fprintf (dump_file, "Create vector_inv.");
1247 for (i = nunits - 1; i >= 0; --i)
1249 t = tree_cons (NULL_TREE, def, t);
1252 vec_inv = build_constructor (vectype, t);
1253 return vect_init_vector (stmt, vec_inv);
1257 /* Function vect_finish_stmt_generation.
1259 Insert a new stmt. */
1262 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
1264 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1266 if (vect_debug_details (NULL))
1268 fprintf (dump_file, "add new stmt: ");
1269 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1272 /* Make sure bsi points to the stmt that is being vectorized. */
1274 /* Assumption: any stmts created for the vectorization of stmt S were
1275 inserted before S. BSI is expected to point to S or some new stmt before S. */
1277 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
1279 gcc_assert (stmt == bsi_stmt (*bsi));
1283 /* Function vectorizable_assignment.
1285 Check if STMT performs an assignment (copy) that can be vectorized.
1286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1291 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1298 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1299 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1302 /* Is vectorizable assignment? */
1304 if (TREE_CODE (stmt) != MODIFY_EXPR)
1307 scalar_dest = TREE_OPERAND (stmt, 0);
1308 if (TREE_CODE (scalar_dest) != SSA_NAME)
1311 op = TREE_OPERAND (stmt, 1);
1312 if (!vect_is_simple_use (op, loop, NULL))
1314 if (vect_debug_details (NULL))
1315 fprintf (dump_file, "use not simple.");
1319 if (!vec_stmt) /* transformation not required. */
1321 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1326 if (vect_debug_details (NULL))
1327 fprintf (dump_file, "transform assignment.");
1330 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1333 op = TREE_OPERAND (stmt, 1);
1334 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1336 /* Arguments are ready. create the new vector stmt. */
1337 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1338 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1339 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1340 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1346 /* Function vectorizable_operation.
1348 Check if STMT performs a binary or unary operation that can be vectorized.
1349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1350 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1354 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1359 tree op0, op1 = NULL;
1360 tree vec_oprnd0, vec_oprnd1=NULL;
1361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1362 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1363 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1365 enum tree_code code;
1366 enum machine_mode vec_mode;
1372 /* Is STMT a vectorizable binary/unary operation? */
1373 if (TREE_CODE (stmt) != MODIFY_EXPR)
1376 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1379 operation = TREE_OPERAND (stmt, 1);
1380 code = TREE_CODE (operation);
1381 optab = optab_for_tree_code (code, vectype);
1383 /* Support only unary or binary operations. */
1384 op_type = TREE_CODE_LENGTH (code);
1385 if (op_type != unary_op && op_type != binary_op)
1387 if (vect_debug_details (NULL))
1388 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1392 for (i = 0; i < op_type; i++)
1394 op = TREE_OPERAND (operation, i);
1395 if (!vect_is_simple_use (op, loop, NULL))
1397 if (vect_debug_details (NULL))
1398 fprintf (dump_file, "use not simple.");
1403 /* Supportable by target? */
1406 if (vect_debug_details (NULL))
1407 fprintf (dump_file, "no optab.");
1410 vec_mode = TYPE_MODE (vectype);
1411 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1413 if (vect_debug_details (NULL))
1414 fprintf (dump_file, "op not supported by target.");
1418 if (!vec_stmt) /* transformation not required. */
1420 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1426 if (vect_debug_details (NULL))
1427 fprintf (dump_file, "transform binary/unary operation.");
1430 scalar_dest = TREE_OPERAND (stmt, 0);
1431 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1434 op0 = TREE_OPERAND (operation, 0);
1435 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1437 if (op_type == binary_op)
1439 op1 = TREE_OPERAND (operation, 1);
1440 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1443 /* Arguments are ready. create the new vector stmt. */
1445 if (op_type == binary_op)
1446 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1447 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1449 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1450 build1 (code, vectype, vec_oprnd0));
1451 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1452 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1453 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1459 /* Function vectorizable_store.
1461 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1463 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1464 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1465 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1468 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1475 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1476 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1477 enum machine_mode vec_mode;
1480 /* Is vectorizable store? */
1482 if (TREE_CODE (stmt) != MODIFY_EXPR)
1485 scalar_dest = TREE_OPERAND (stmt, 0);
1486 if (TREE_CODE (scalar_dest) != ARRAY_REF
1487 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1490 op = TREE_OPERAND (stmt, 1);
1491 if (!vect_is_simple_use (op, loop, NULL))
1493 if (vect_debug_details (NULL))
1494 fprintf (dump_file, "use not simple.");
1498 vec_mode = TYPE_MODE (vectype);
1499 /* FORNOW. In some cases can vectorize even if data-type not supported
1500 (e.g. - array initialization with 0). */
1501 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1504 if (!STMT_VINFO_DATA_REF (stmt_info))
1507 if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
1510 if (!vec_stmt) /* transformation not required. */
1512 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1518 if (vect_debug_details (NULL))
1519 fprintf (dump_file, "transform store");
1521 /* Handle use - get the vectorized def from the defining stmt. */
1522 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1525 /* FORNOW: make sure the data reference is aligned. */
1526 vect_align_data_ref (stmt);
1527 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1528 data_ref = build_fold_indirect_ref (data_ref);
1530 /* Arguments are ready. create the new vector stmt. */
1531 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1532 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1538 /* vectorizable_load.
1540 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1547 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1550 tree vec_dest = NULL;
1551 tree data_ref = NULL;
1553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1554 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1555 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1562 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1563 edge pe = loop_preheader_edge (loop);
1564 bool software_pipeline_loads_p = false;
1566 /* Is vectorizable load? */
1568 if (TREE_CODE (stmt) != MODIFY_EXPR)
1571 scalar_dest = TREE_OPERAND (stmt, 0);
1572 if (TREE_CODE (scalar_dest) != SSA_NAME)
1575 op = TREE_OPERAND (stmt, 1);
1576 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1579 if (!STMT_VINFO_DATA_REF (stmt_info))
1582 mode = (int) TYPE_MODE (vectype);
1584 /* FORNOW. In some cases can vectorize even if data-type not supported
1585 (e.g. - data copies). */
1586 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1588 if (vect_debug_details (loop))
1589 fprintf (dump_file, "Aligned load, but unsupported type.");
1593 if (!aligned_access_p (dr))
1595 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1596 && (!targetm.vectorize.builtin_mask_for_load
1597 || targetm.vectorize.builtin_mask_for_load ()))
1598 software_pipeline_loads_p = true;
1599 else if (!targetm.vectorize.misaligned_mem_ok (mode))
1601 /* Possibly unaligned access, and can't sofware pipeline the loads */
1602 if (vect_debug_details (loop))
1603 fprintf (dump_file, "Arbitrary load not supported.");
1608 if (!vec_stmt) /* transformation not required. */
1610 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1616 if (vect_debug_details (NULL))
1617 fprintf (dump_file, "transform load.");
1619 if (!software_pipeline_loads_p)
1630 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1631 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1632 if (aligned_access_p (dr))
1633 data_ref = build_fold_indirect_ref (data_ref);
1636 int mis = DR_MISALIGNMENT (dr);
1637 tree tmis = (mis == -1 ?
1639 build_int_cst (integer_type_node, mis));
1640 tmis = int_const_binop (MULT_EXPR, tmis,
1641 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
1642 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1644 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1645 new_temp = make_ssa_name (vec_dest, new_stmt);
1646 TREE_OPERAND (new_stmt, 0) = new_temp;
1647 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1649 else /* software-pipeline the loads */
1653 msq_init = *(floor(p1))
1654 p2 = initial_addr + VS - 1;
1655 magic = have_builtin ? builtin_result : initial_address;
1658 p2' = p2 + indx * vectype_size
1660 vec_dest = realign_load (msq, lsq, magic)
1674 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1675 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1676 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1678 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1679 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1680 new_temp = make_ssa_name (vec_dest, new_stmt);
1681 TREE_OPERAND (new_stmt, 0) = new_temp;
1682 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1683 gcc_assert (!new_bb);
1684 msq_init = TREE_OPERAND (new_stmt, 0);
1687 /* <2> Create lsq = *(floor(p2')) in the loop */
1688 offset = build_int_cst (integer_type_node,
1689 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1690 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1691 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1692 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1693 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1694 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1695 new_temp = make_ssa_name (vec_dest, new_stmt);
1696 TREE_OPERAND (new_stmt, 0) = new_temp;
1697 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1698 lsq = TREE_OPERAND (new_stmt, 0);
1702 if (targetm.vectorize.builtin_mask_for_load)
1704 /* Create permutation mask, if required, in loop preheader. */
1706 params = build_tree_list (NULL_TREE, init_addr);
1707 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1708 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1709 new_stmt = build_function_call_expr (builtin_decl, params);
1710 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1711 new_temp = make_ssa_name (vec_dest, new_stmt);
1712 TREE_OPERAND (new_stmt, 0) = new_temp;
1713 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1714 gcc_assert (!new_bb);
1715 magic = TREE_OPERAND (new_stmt, 0);
1719 /* Use current address instead of init_addr for reduced reg pressure. */
1720 magic = dataref_ptr;
1724 /* <4> Create msq = phi <msq_init, lsq> in loop */
1725 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1726 msq = make_ssa_name (vec_dest, NULL_TREE);
1727 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1728 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1729 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
1730 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
1733 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1734 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1735 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1736 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1737 new_temp = make_ssa_name (vec_dest, new_stmt);
1738 TREE_OPERAND (new_stmt, 0) = new_temp;
1739 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1742 *vec_stmt = new_stmt;
1747 /* Function vect_transform_stmt.
1749 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1752 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1754 bool is_store = false;
1755 tree vec_stmt = NULL_TREE;
1756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1759 switch (STMT_VINFO_TYPE (stmt_info))
1761 case op_vec_info_type:
1762 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1766 case assignment_vec_info_type:
1767 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1771 case load_vec_info_type:
1772 done = vectorizable_load (stmt, bsi, &vec_stmt);
1776 case store_vec_info_type:
1777 done = vectorizable_store (stmt, bsi, &vec_stmt);
1782 if (vect_debug_details (NULL))
1783 fprintf (dump_file, "stmt not supported.");
1787 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1793 /* Function vect_transform_loop_bound.
1795 Create a new exit condition for the loop. */
1798 vect_transform_loop_bound (loop_vec_info loop_vinfo)
1800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1801 edge exit_edge = loop->single_exit;
1802 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1803 tree indx_before_incr, indx_after_incr;
1804 tree orig_cond_expr;
1805 HOST_WIDE_INT old_N = 0;
1808 tree new_loop_bound;
1812 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
1813 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1814 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1817 assuming number-of-iterations divides by the vectorization factor. */
1818 gcc_assert (!(old_N % vf));
1820 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1821 gcc_assert (orig_cond_expr);
1822 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
1824 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1825 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
1827 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
1828 to point to the exit condition. */
1829 bsi_next (&loop_exit_bsi);
1830 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
1832 /* new loop exit test: */
1833 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
1834 new_loop_bound = build_int_cst (lb_type, old_N/vf);
1836 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1837 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1838 else /* 'then' edge loops back. */
1839 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1841 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1842 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1844 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1846 /* remove old loop exit test: */
1847 bsi_remove (&loop_exit_bsi);
1849 if (vect_debug_details (NULL))
1850 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1854 /* Function vect_transform_loop.
1856 The analysis phase has determined that the loop is vectorizable.
1857 Vectorize the loop - created vectorized stmts to replace the scalar
1858 stmts in the loop, and update the loop exit condition. */
1861 vect_transform_loop (loop_vec_info loop_vinfo,
1862 struct loops *loops ATTRIBUTE_UNUSED)
1864 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1865 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1866 int nbbs = loop->num_nodes;
1867 block_stmt_iterator si;
1869 #ifdef ENABLE_CHECKING
1870 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1873 if (vect_debug_details (NULL))
1874 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1876 /* 1) Make sure the loop header has exactly two entries
1877 2) Make sure we have a preheader basic block. */
1879 gcc_assert (loop->header->pred->pred_next);
1880 gcc_assert (!loop->header->pred->pred_next->pred_next);
1882 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1885 /* FORNOW: the vectorizer supports only loops which body consist
1886 of one basic block (header + empty latch). When the vectorizer will
1887 support more involved loop forms, the order by which the BBs are
1888 traversed need to be reconsidered. */
1890 for (i = 0; i < nbbs; i++)
1892 basic_block bb = bbs[i];
1894 for (si = bsi_start (bb); !bsi_end_p (si);)
1896 tree stmt = bsi_stmt (si);
1897 stmt_vec_info stmt_info;
1899 #ifdef ENABLE_CHECKING
1903 if (vect_debug_details (NULL))
1905 fprintf (dump_file, "------>vectorizing statement: ");
1906 print_generic_expr (dump_file, stmt, TDF_SLIM);
1908 stmt_info = vinfo_for_stmt (stmt);
1909 gcc_assert (stmt_info);
1910 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1915 #ifdef ENABLE_CHECKING
1916 /* FORNOW: Verify that all stmts operate on the same number of
1917 units and no inner unrolling is necessary. */
1918 vectype = STMT_VINFO_VECTYPE (stmt_info);
1919 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype))
1920 == vectorization_factor);
1922 /* -------- vectorize statement ------------ */
1923 if (vect_debug_details (NULL))
1924 fprintf (dump_file, "transform statement.");
1926 is_store = vect_transform_stmt (stmt, &si);
1929 /* free the attached stmt_vec_info and remove the stmt. */
1930 stmt_ann_t ann = stmt_ann (stmt);
1932 set_stmt_info (ann, NULL);
1941 vect_transform_loop_bound (loop_vinfo);
1943 if (vect_debug_details (loop))
1944 fprintf (dump_file,"Success! loop vectorized.");
1945 if (vect_debug_stats (loop))
1946 fprintf (dump_file, "LOOP VECTORIZED.");
1950 /* Function vect_is_simple_use.
1953 LOOP - the loop that is being vectorized.
1954 OPERAND - operand of a stmt in LOOP.
1955 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1957 Returns whether a stmt with OPERAND can be vectorized.
1958 Supportable operands are constants, loop invariants, and operands that are
1959 defined by the current iteration of the loop. Unsupportable operands are
1960 those that are defined by a previous iteration of the loop (as is the case
1961 in reduction/induction computations). */
1964 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1972 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1975 if (TREE_CODE (operand) != SSA_NAME)
1978 def_stmt = SSA_NAME_DEF_STMT (operand);
1979 if (def_stmt == NULL_TREE )
1981 if (vect_debug_details (NULL))
1982 fprintf (dump_file, "no def_stmt.");
1986 /* empty stmt is expected only in case of a function argument.
1987 (Otherwise - we expect a phi_node or a modify_expr). */
1988 if (IS_EMPTY_STMT (def_stmt))
1990 tree arg = TREE_OPERAND (def_stmt, 0);
1991 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1993 if (vect_debug_details (NULL))
1995 fprintf (dump_file, "Unexpected empty stmt: ");
1996 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2001 /* phi_node inside the loop indicates an induction/reduction pattern.
2002 This is not supported yet. */
2003 bb = bb_for_stmt (def_stmt);
2004 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2006 if (vect_debug_details (NULL))
2007 fprintf (dump_file, "reduction/induction - unsupported.");
2008 return false; /* FORNOW: not supported yet. */
2011 /* Expecting a modify_expr or a phi_node. */
2012 if (TREE_CODE (def_stmt) == MODIFY_EXPR
2013 || TREE_CODE (def_stmt) == PHI_NODE)
2024 /* Function vect_analyze_operations.
2026 Scan the loop stmts and make sure they are all vectorizable. */
2029 vect_analyze_operations (loop_vec_info loop_vinfo)
2031 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2032 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2033 int nbbs = loop->num_nodes;
2034 block_stmt_iterator si;
2035 int vectorization_factor = 0;
2040 if (vect_debug_details (NULL))
2041 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
2043 for (i = 0; i < nbbs; i++)
2045 basic_block bb = bbs[i];
2047 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2049 tree stmt = bsi_stmt (si);
2051 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2054 if (vect_debug_details (NULL))
2056 fprintf (dump_file, "==> examining statement: ");
2057 print_generic_expr (dump_file, stmt, TDF_SLIM);
2060 gcc_assert (stmt_info);
2062 /* skip stmts which do not need to be vectorized.
2063 this is expected to include:
2064 - the COND_EXPR which is the loop exit condition
2065 - any LABEL_EXPRs in the loop
2066 - computations that are used only for array indexing or loop
2069 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2071 if (vect_debug_details (NULL))
2072 fprintf (dump_file, "irrelevant.");
2076 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
2078 if (vect_debug_stats (loop) || vect_debug_details (loop))
2080 fprintf (dump_file, "not vectorized: vector stmt in loop:");
2081 print_generic_expr (dump_file, stmt, TDF_SLIM);
2086 if (STMT_VINFO_DATA_REF (stmt_info))
2087 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
2088 else if (TREE_CODE (stmt) == MODIFY_EXPR)
2089 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
2091 scalar_type = TREE_TYPE (stmt);
2093 if (vect_debug_details (NULL))
2095 fprintf (dump_file, "get vectype for scalar type: ");
2096 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2099 vectype = get_vectype_for_scalar_type (scalar_type);
2102 if (vect_debug_stats (loop) || vect_debug_details (loop))
2104 fprintf (dump_file, "not vectorized: unsupported data-type ");
2105 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2110 if (vect_debug_details (NULL))
2112 fprintf (dump_file, "vectype: ");
2113 print_generic_expr (dump_file, vectype, TDF_SLIM);
2115 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2117 ok = (vectorizable_operation (stmt, NULL, NULL)
2118 || vectorizable_assignment (stmt, NULL, NULL)
2119 || vectorizable_load (stmt, NULL, NULL)
2120 || vectorizable_store (stmt, NULL, NULL));
2124 if (vect_debug_stats (loop) || vect_debug_details (loop))
2126 fprintf (dump_file, "not vectorized: stmt not supported: ");
2127 print_generic_expr (dump_file, stmt, TDF_SLIM);
2132 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2133 if (vect_debug_details (NULL))
2134 fprintf (dump_file, "nunits = %d", nunits);
2136 if (vectorization_factor)
2138 /* FORNOW: don't allow mixed units.
2139 This restriction will be relaxed in the future. */
2140 if (nunits != vectorization_factor)
2142 if (vect_debug_stats (loop) || vect_debug_details (loop))
2143 fprintf (dump_file, "not vectorized: mixed data-types");
2148 vectorization_factor = nunits;
2152 /* TODO: Analyze cost. Decide if worth while to vectorize. */
2153 if (!vectorization_factor)
2155 if (vect_debug_stats (loop) || vect_debug_details (loop))
2156 fprintf (dump_file, "not vectorized: unsupported data-type");
2159 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
2161 /* FORNOW: handle only cases where the loop bound divides by the
2162 vectorization factor. */
2164 if (vect_debug_details (NULL))
2166 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
2167 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
2169 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2171 if (vect_debug_stats (loop) || vect_debug_details (loop))
2172 fprintf (dump_file, "not vectorized: Unknown loop bound.");
2176 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2177 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
2179 if (vect_debug_stats (loop) || vect_debug_details (loop))
2180 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
2181 vectorization_factor);
2189 /* Function exist_non_indexing_operands_for_use_p
2191 USE is one of the uses attached to STMT. Check if USE is
2192 used in STMT for anything other than indexing an array. */
2195 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
2198 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2200 /* USE corresponds to some operand in STMT. If there is no data
2201 reference in STMT, then any operand that corresponds to USE
2202 is not indexing an array. */
2203 if (!STMT_VINFO_DATA_REF (stmt_info))
2206 /* STMT has a data_ref. FORNOW this means that its of one of
2207 the following forms:
2210 (This should have been verified in analyze_data_refs).
2212 'var' in the second case corresponds to a def, not a use,
2213 so USE cannot correspond to any operands that are not used
2216 Therefore, all we need to check is if STMT falls into the
2217 first case, and whether var corresponds to USE. */
2219 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
2222 operand = TREE_OPERAND (stmt, 1);
2224 if (TREE_CODE (operand) != SSA_NAME)
2234 /* Function vect_is_simple_iv_evolution.
2236 FORNOW: A simple evolution of an induction variables in the loop is
2237 considered a polynomial evolution with constant step. */
2240 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2241 tree * step, bool strict)
2246 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2248 /* When there is no evolution in this loop, the evolution function
2250 if (evolution_part == NULL_TREE)
2253 /* When the evolution is a polynomial of degree >= 2
2254 the evolution function is not "simple". */
2255 if (tree_is_chrec (evolution_part))
2258 step_expr = evolution_part;
2259 init_expr = initial_condition (access_fn);
2261 if (vect_debug_details (NULL))
2263 fprintf (dump_file, "step: ");
2264 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2265 fprintf (dump_file, ", init: ");
2266 print_generic_expr (dump_file, init_expr, TDF_SLIM);
2272 if (TREE_CODE (step_expr) != INTEGER_CST)
2274 if (vect_debug_details (NULL))
2275 fprintf (dump_file, "step unknown.");
2280 if (!integer_onep (step_expr))
2282 if (vect_debug_details (NULL))
2283 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2291 /* Function vect_analyze_scalar_cycles.
2293 Examine the cross iteration def-use cycles of scalar variables, by
2294 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
2295 cycles that they represent do not impede vectorization.
2297 FORNOW: Reduction as in the following loop, is not supported yet:
2301 The cross-iteration cycle corresponding to variable 'sum' will be
2302 considered too complicated and will impede vectorization.
2304 FORNOW: Induction as in the following loop, is not supported yet:
2309 However, the following loop *is* vectorizable:
2314 In both loops there exists a def-use cycle for the variable i:
2315 loop: i_2 = PHI (i_0, i_1)
2320 The evolution of the above cycle is considered simple enough,
2321 however, we also check that the cycle does not need to be
2322 vectorized, i.e - we check that the variable that this cycle
2323 defines is only used for array indexing or in stmts that do not
2324 need to be vectorized. This is not the case in loop2, but it
2325 *is* the case in loop3. */
2328 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
2331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2332 basic_block bb = loop->header;
2335 if (vect_debug_details (NULL))
2336 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
2338 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
2340 tree access_fn = NULL;
2342 if (vect_debug_details (NULL))
2344 fprintf (dump_file, "Analyze phi: ");
2345 print_generic_expr (dump_file, phi, TDF_SLIM);
2348 /* Skip virtual phi's. The data dependences that are associated with
2349 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2351 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2353 if (vect_debug_details (NULL))
2354 fprintf (dump_file, "virtual phi. skip.");
2358 /* Analyze the evolution function. */
2360 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2361 those of loop induction variables; This property is verified here.
2363 Furthermore, if that induction variable is used in an operation
2364 that needs to be vectorized (i.e, is not solely used to index
2365 arrays and check the exit condition) - we do not support its
2366 vectorization yet. This property is verified in vect_is_simple_use,
2367 during vect_analyze_operations. */
2369 access_fn = /* instantiate_parameters
2371 analyze_scalar_evolution (loop, PHI_RESULT (phi));
2375 if (vect_debug_stats (loop) || vect_debug_details (loop))
2376 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2380 if (vect_debug_details (NULL))
2382 fprintf (dump_file, "Access function of PHI: ");
2383 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2386 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
2389 if (vect_debug_stats (loop) || vect_debug_details (loop))
2390 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2399 /* Function vect_analyze_data_ref_dependence.
2401 Return TRUE if there (might) exist a dependence between a memory-reference
2402 DRA and a memory-reference DRB. */
2405 vect_analyze_data_ref_dependence (struct data_reference *dra,
2406 struct data_reference *drb,
2410 struct data_dependence_relation *ddr;
2412 if (!array_base_name_differ_p (dra, drb, &differ_p))
2414 if (vect_debug_stats (loop) || vect_debug_details (loop))
2417 "not vectorized: can't determine dependence between: ");
2418 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2419 fprintf (dump_file, " and ");
2420 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2428 ddr = initialize_data_dependence_relation (dra, drb);
2429 compute_affine_dependence (ddr);
2431 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2434 if (vect_debug_stats (loop) || vect_debug_details (loop))
2437 "not vectorized: possible dependence between data-refs ");
2438 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2439 fprintf (dump_file, " and ");
2440 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2447 /* Function vect_analyze_data_ref_dependences.
2449 Examine all the data references in the loop, and make sure there do not
2450 exist any data dependences between them.
2452 TODO: dependences which distance is greater than the vectorization factor
2456 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2459 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2460 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2463 /* Examine store-store (output) dependences. */
2465 if (vect_debug_details (NULL))
2466 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2468 if (vect_debug_details (NULL))
2469 fprintf (dump_file, "compare all store-store pairs.");
2471 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2473 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2475 struct data_reference *dra =
2476 VARRAY_GENERIC_PTR (loop_write_refs, i);
2477 struct data_reference *drb =
2478 VARRAY_GENERIC_PTR (loop_write_refs, j);
2479 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2484 /* Examine load-store (true/anti) dependences. */
2486 if (vect_debug_details (NULL))
2487 fprintf (dump_file, "compare all load-store pairs.");
2489 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2491 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2493 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2494 struct data_reference *drb =
2495 VARRAY_GENERIC_PTR (loop_write_refs, j);
2496 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2505 /* Function vect_get_first_index.
2507 REF is a data reference.
2508 If it is an ARRAY_REF: if its lower bound is simple enough,
2509 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2510 If it is not an ARRAY_REF: REF has no "first index";
2511 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2514 vect_get_first_index (tree ref, tree *array_first_index)
2518 if (TREE_CODE (ref) != ARRAY_REF)
2519 *array_first_index = size_zero_node;
2522 array_start = array_ref_low_bound (ref);
2523 if (!host_integerp (array_start,0))
2525 if (vect_debug_details (NULL))
2527 fprintf (dump_file, "array min val not simple integer cst.");
2528 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2532 *array_first_index = array_start;
2539 /* Function vect_compute_array_base_alignment.
2540 A utility function of vect_compute_array_ref_alignment.
2542 Compute the misalignment of ARRAY in bits.
2545 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
2546 VECTYPE - we are interested in the misalignment modulo the size of vectype.
2547 if NULL: don't compute misalignment, just return the base of ARRAY.
2548 PREV_DIMENSIONS - initialized to one.
2549 MISALIGNMENT - the computed misalignment in bits.
2552 If VECTYPE is not NULL:
2553 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
2554 the base of the array, and put the computed misalignment in MISALIGNMENT.
2556 Return the base of the array.
2558 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
2559 a[idx_N]...[idx_2][idx_1] is
2560 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
2561 ... + idx_N * dim_0 * ... * dim_N-1}.
2562 (The misalignment of &a is not checked here).
2563 Note, that every term contains dim_0, therefore, if dim_0 is a
2564 multiple of NUNITS, the whole sum is a multiple of NUNITS.
2565 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
2566 NUINTS, we can say that the misalignment of the sum is equal to
2567 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
2568 we can't determine this array misalignment, and we return
2570 We proceed recursively in this manner, accumulating total misalignment
2571 and the multiplication of previous dimensions for correct misalignment
2575 vect_compute_array_base_alignment (tree array,
2577 tree *prev_dimensions,
2582 tree dimension_size;
2584 tree bits_per_vectype;
2585 tree bits_per_vectype_unit;
2587 /* The 'stop condition' of the recursion. */
2588 if (TREE_CODE (array) != ARRAY_REF)
2592 /* Just get the base decl. */
2593 return vect_compute_array_base_alignment
2594 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2596 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
2597 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
2600 domain = TYPE_DOMAIN (TREE_TYPE (array));
2602 int_const_binop (PLUS_EXPR,
2603 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
2604 TYPE_MIN_VALUE (domain), 1),
2607 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
2608 is a multiple of NUNITS:
2610 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
2612 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
2613 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
2614 if (integer_zerop (mis))
2615 /* This array is aligned. Continue just in order to get the base decl. */
2616 return vect_compute_array_base_alignment
2617 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2619 index = TREE_OPERAND (array, 1);
2620 if (!host_integerp (index, 1))
2621 /* The current index is not constant. */
2624 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
2626 bits_per_vectype = fold_convert (unsigned_type_node,
2627 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2628 GET_MODE_SIZE (TYPE_MODE (vectype))));
2629 bits_per_vectype_unit = fold_convert (unsigned_type_node,
2630 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2631 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
2633 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
2637 (*misalignment + index_val * dimension_size * *prev_dimensions)
2641 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
2642 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
2643 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
2644 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
2645 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
2648 *prev_dimensions = int_const_binop (MULT_EXPR,
2649 *prev_dimensions, dimension_size, 1);
2651 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
2657 /* Function vect_compute_data_ref_alignment
2659 Compute the misalignment of the data reference DR.
2662 1. If during the misalignment computation it is found that the data reference
2663 cannot be vectorized then false is returned.
2664 2. DR_MISALIGNMENT (DR) is defined.
2666 FOR NOW: No analysis is actually performed. Misalignment is calculated
2667 only for trivial cases. TODO. */
2670 vect_compute_data_ref_alignment (struct data_reference *dr,
2671 loop_vec_info loop_vinfo)
2673 tree stmt = DR_STMT (dr);
2674 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2675 tree ref = DR_REF (dr);
2678 tree offset = size_zero_node;
2679 tree base, bit_offset, alignment;
2680 tree unit_bits = fold_convert (unsigned_type_node,
2681 build_int_cst (NULL_TREE, BITS_PER_UNIT));
2683 bool base_aligned_p;
2685 if (vect_debug_details (NULL))
2686 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2688 /* Initialize misalignment to unknown. */
2689 DR_MISALIGNMENT (dr) = -1;
2691 scalar_type = TREE_TYPE (ref);
2692 vectype = get_vectype_for_scalar_type (scalar_type);
2695 if (vect_debug_details (NULL))
2697 fprintf (dump_file, "no vectype for stmt: ");
2698 print_generic_expr (dump_file, stmt, TDF_SLIM);
2699 fprintf (dump_file, " scalar_type: ");
2700 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2702 /* It is not possible to vectorize this data reference. */
2705 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
2707 if (TREE_CODE (ref) == ARRAY_REF)
2710 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
2712 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
2713 loop_vinfo, &bit_offset, &base_aligned_p);
2716 if (vect_debug_details (NULL))
2718 fprintf (dump_file, "Unknown alignment for access: ");
2719 print_generic_expr (dump_file,
2720 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
2725 if (!base_aligned_p)
2727 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
2729 if (vect_debug_details (NULL))
2731 fprintf (dump_file, "can't force alignment of ref: ");
2732 print_generic_expr (dump_file, ref, TDF_SLIM);
2737 /* Force the alignment of the decl.
2738 NOTE: This is the only change to the code we make during
2739 the analysis phase, before deciding to vectorize the loop. */
2740 if (vect_debug_details (NULL))
2741 fprintf (dump_file, "force alignment");
2742 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
2743 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
2746 /* At this point we assume that the base is aligned, and the offset from it
2747 (including index, if relevant) has been computed and is in BIT_OFFSET. */
2748 gcc_assert (base_aligned_p
2749 || (TREE_CODE (base) == VAR_DECL
2750 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
2752 /* Convert into bytes. */
2753 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2754 /* Check that there is no remainder in bits. */
2755 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2756 if (!integer_zerop (bit_offset))
2758 if (vect_debug_details (NULL))
2760 fprintf (dump_file, "bit offset alignment: ");
2761 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
2766 /* Alignment required, in bytes: */
2767 alignment = fold_convert (unsigned_type_node,
2768 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
2770 /* Modulo alignment. */
2771 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
2772 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
2774 if (vect_debug_details (NULL))
2775 fprintf (dump_file, "unexpected misalign value");
2779 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
2781 if (vect_debug_details (NULL))
2782 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
2788 /* Function vect_compute_array_ref_alignment
2790 Compute the alignment of an array-ref.
2791 The alignment we compute here is relative to
2792 TYPE_ALIGN(VECTYPE) boundary.
2795 OFFSET - the alignment in bits
2796 Return value - the base of the array-ref. E.g,
2797 if the array-ref is a.b[k].c[i][j] the returned
2802 vect_compute_array_ref_alignment (struct data_reference *dr,
2803 loop_vec_info loop_vinfo,
2807 tree array_first_index = size_zero_node;
2809 tree ref = DR_REF (dr);
2810 tree scalar_type = TREE_TYPE (ref);
2811 tree oprnd0 = TREE_OPERAND (ref, 0);
2812 tree dims = size_one_node;
2813 tree misalign = size_zero_node;
2814 tree next_ref, this_offset = size_zero_node;
2818 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
2819 /* The reference is an array without its last index. */
2820 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
2823 vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
2825 /* Alignment is not requested. Just return the base. */
2828 /* Compute alignment. */
2829 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
2831 this_offset = misalign;
2833 /* Check the first index accessed. */
2834 if (!vect_get_first_index (ref, &array_first_index))
2836 if (vect_debug_details (NULL))
2837 fprintf (dump_file, "no first_index for array.");
2841 /* Check the index of the array_ref. */
2842 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
2843 LOOP_VINFO_LOOP (loop_vinfo)->num);
2845 /* FORNOW: In order to simplify the handling of alignment, we make sure
2846 that the first location at which the array is accessed ('init') is on an
2847 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
2848 This is too conservative, since we require that
2849 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
2850 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2851 This should be relaxed in the future. */
2853 if (!init || !host_integerp (init, 0))
2855 if (vect_debug_details (NULL))
2856 fprintf (dump_file, "non constant init. ");
2860 /* bytes per scalar element: */
2861 nunits = fold_convert (unsigned_type_node,
2862 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
2863 nbits = int_const_binop (MULT_EXPR, nunits,
2864 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
2866 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
2867 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
2868 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
2869 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
2871 /* TODO: allow negative misalign values. */
2872 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
2874 if (vect_debug_details (NULL))
2875 fprintf (dump_file, "unexpected misalign value");
2883 /* Function vect_compute_data_refs_alignment
2885 Compute the misalignment of data references in the loop.
2886 This pass may take place at function granularity instead of at loop
2889 FOR NOW: No analysis is actually performed. Misalignment is calculated
2890 only for trivial cases. TODO. */
2893 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2895 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2896 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2899 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2901 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2902 vect_compute_data_ref_alignment (dr, loop_vinfo);
2905 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2907 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2908 vect_compute_data_ref_alignment (dr, loop_vinfo);
2913 /* Function vect_enhance_data_refs_alignment
2915 This pass will use loop versioning and loop peeling in order to enhance
2916 the alignment of data references in the loop.
2918 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2919 original loop is to be vectorized; Any other loops that are created by
2920 the transformations performed in this pass - are not supposed to be
2921 vectorized. This restriction will be relaxed.
2923 FOR NOW: No transformation is actually performed. TODO. */
2926 vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
2929 This pass will require a cost model to guide it whether to apply peeling
2930 or versioning or a combination of the two. For example, the scheme that
2931 intel uses when given a loop with several memory accesses, is as follows:
2932 choose one memory access ('p') which alignment you want to force by doing
2933 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2934 other accesses are not necessarily aligned, or (2) use loop versioning to
2935 generate one loop in which all accesses are aligned, and another loop in
2936 which only 'p' is necessarily aligned.
2938 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2939 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2940 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2942 Devising a cost model is the most critical aspect of this work. It will
2943 guide us on which access to peel for, whether to use loop versioning, how
2944 many versions to create, etc. The cost model will probably consist of
2945 generic considerations as well as target specific considerations (on
2946 powerpc for example, misaligned stores are more painful than misaligned
2949 Here is the general steps involved in alignment enhancements:
2951 -- original loop, before alignment analysis:
2952 for (i=0; i<N; i++){
2953 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2954 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2957 -- After vect_compute_data_refs_alignment:
2958 for (i=0; i<N; i++){
2959 x = q[i]; # DR_MISALIGNMENT(q) = 3
2960 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2963 -- Possibility 1: we do loop versioning:
2965 for (i=0; i<N; i++){ # loop 1A
2966 x = q[i]; # DR_MISALIGNMENT(q) = 3
2967 p[i] = y; # DR_MISALIGNMENT(p) = 0
2971 for (i=0; i<N; i++){ # loop 1B
2972 x = q[i]; # DR_MISALIGNMENT(q) = 3
2973 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2977 -- Possibility 2: we do loop peeling:
2978 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2982 for (i = 3; i < N; i++){ # loop 2A
2983 x = q[i]; # DR_MISALIGNMENT(q) = 0
2984 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2987 -- Possibility 3: combination of loop peeling and versioning:
2988 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2993 for (i = 3; i<N; i++){ # loop 3A
2994 x = q[i]; # DR_MISALIGNMENT(q) = 0
2995 p[i] = y; # DR_MISALIGNMENT(p) = 0
2999 for (i = 3; i<N; i++){ # loop 3B
3000 x = q[i]; # DR_MISALIGNMENT(q) = 0
3001 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
3005 These loops are later passed to loop_transform to be vectorized. The
3006 vectorizer will use the alignment information to guide the transformation
3007 (whether to generate regular loads/stores, or with special handling for
3013 /* Function vect_analyze_data_refs_alignment
3015 Analyze the alignment of the data-references in the loop.
3016 FOR NOW: Until support for misliagned accesses is in place, only if all
3017 accesses are aligned can the loop be vectorized. This restriction will be
3021 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3023 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3024 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
3028 if (vect_debug_details (NULL))
3029 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
3032 /* This pass may take place at function granularity instead of at loop
3035 vect_compute_data_refs_alignment (loop_vinfo);
3038 /* This pass will use loop versioning and loop peeling in order to enhance
3039 the alignment of data references in the loop.
3040 FOR NOW: we assume that whatever versioning/peeling took place, the
3041 original loop is to be vectorized. Any other loops that were created by
3042 the transformations performed in this pass - are not supposed to be
3043 vectorized. This restriction will be relaxed. */
3045 vect_enhance_data_refs_alignment (loop_vinfo);
3048 /* Finally, check that loop can be vectorized.
3049 FOR NOW: Until support for misaligned accesses is in place, only if all
3050 accesses are aligned can the loop be vectorized. This restriction will be
3053 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3055 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3056 if (!aligned_access_p (dr))
3058 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3059 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3060 fprintf (dump_file, "not vectorized: unaligned store.");
3065 /* The vectorizer now supports misaligned loads, so we don't fail anymore
3066 in the presence of a misaligned read dataref. For some targets however
3067 it may be preferable not to vectorize in such a case as misaligned
3068 accesses are very costly. This should be considered in the future. */
3070 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3072 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3073 if (!aligned_access_p (dr))
3075 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3076 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3077 fprintf (dump_file, "not vectorized: unaligned load.");
3087 /* Function vect_analyze_data_ref_access.
3089 Analyze the access pattern of the data-reference DR. For now, a data access
3090 has to consecutive and aligned to be considered vectorizable. */
3093 vect_analyze_data_ref_access (struct data_reference *dr)
3095 varray_type access_fns = DR_ACCESS_FNS (dr);
3098 unsigned int dimensions, i;
3100 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
3101 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
3102 access is contiguous). */
3103 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
3105 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
3107 access_fn = DR_ACCESS_FN (dr, i);
3109 if (evolution_part_in_loop_num (access_fn,
3110 loop_containing_stmt (DR_STMT (dr))->num))
3112 /* Evolution part is not NULL in this loop (it is neither constant nor
3114 if (vect_debug_details (NULL))
3117 "not vectorized: complicated multidimensional array access.");
3118 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3124 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
3125 if (!evolution_function_is_constant_p (access_fn)
3126 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
3127 access_fn, &init, &step, true))
3129 if (vect_debug_details (NULL))
3131 fprintf (dump_file, "not vectorized: too complicated access function.");
3132 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3141 /* Function vect_analyze_data_ref_accesses.
3143 Analyze the access pattern of all the data references in the loop.
3145 FORNOW: the only access pattern that is considered vectorizable is a
3146 simple step 1 (consecutive) access.
3148 FORNOW: handle only arrays and pointer accesses. */
3151 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
3154 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3155 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3157 if (vect_debug_details (NULL))
3158 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
3160 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3162 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3163 bool ok = vect_analyze_data_ref_access (dr);
3166 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3167 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3168 fprintf (dump_file, "not vectorized: complicated access pattern.");
3173 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3175 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3176 bool ok = vect_analyze_data_ref_access (dr);
3179 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3180 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3181 fprintf (dump_file, "not vectorized: complicated access pattern.");
3190 /* Function vect_analyze_pointer_ref_access.
3193 STMT - a stmt that contains a data-ref
3194 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
3196 If the data-ref access is vectorizable, return a data_reference structure
3197 that represents it (DR). Otherwise - return NULL. */
3199 static struct data_reference *
3200 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
3202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3203 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
3204 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
3207 tree reftype, innertype;
3208 enum machine_mode innermode;
3209 tree indx_access_fn;
3210 int loopnum = loop->num;
3211 struct data_reference *dr;
3215 if (vect_debug_stats (loop) || vect_debug_details (loop))
3216 fprintf (dump_file, "not vectorized: complicated pointer access.");
3220 if (vect_debug_details (NULL))
3222 fprintf (dump_file, "Access function of ptr: ");
3223 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3226 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
3228 if (vect_debug_stats (loop) || vect_debug_details (loop))
3229 fprintf (dump_file, "not vectorized: pointer access is not simple.");
3235 if (!host_integerp (step,0))
3237 if (vect_debug_stats (loop) || vect_debug_details (loop))
3239 "not vectorized: non constant step for pointer access.");
3243 step_val = TREE_INT_CST_LOW (step);
3245 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
3246 if (TREE_CODE (reftype) != POINTER_TYPE)
3248 if (vect_debug_stats (loop) || vect_debug_details (loop))
3249 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3253 reftype = TREE_TYPE (init);
3254 if (TREE_CODE (reftype) != POINTER_TYPE)
3256 if (vect_debug_stats (loop) || vect_debug_details (loop))
3257 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3261 innertype = TREE_TYPE (reftype);
3262 innermode = TYPE_MODE (innertype);
3263 if (GET_MODE_SIZE (innermode) != step_val)
3265 /* FORNOW: support only consecutive access */
3266 if (vect_debug_stats (loop) || vect_debug_details (loop))
3267 fprintf (dump_file, "not vectorized: non consecutive access.");
3272 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
3273 if (vect_debug_details (NULL))
3275 fprintf (dump_file, "Access function of ptr indx: ");
3276 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
3278 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
3283 /* Function vect_get_symbl_and_dr.
3285 The function returns SYMBL - the relevant variable for
3286 memory tag (for aliasing purposes).
3287 Also data reference structure DR is created.
3290 MEMREF - data reference in STMT
3291 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
3294 DR - data_reference struct for MEMREF
3295 return value - the relevant variable for memory tag (for aliasing purposes).
3300 vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
3301 loop_vec_info loop_vinfo, struct data_reference **dr)
3303 tree symbl, oprnd0, oprnd1;
3304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3306 tree array_base, base;
3307 struct data_reference *new_dr;
3308 bool base_aligned_p;
3311 switch (TREE_CODE (memref))
3314 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
3318 symbl = DR_BASE_NAME (new_dr);
3319 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
3321 switch (TREE_CODE (symbl))
3325 oprnd0 = TREE_OPERAND (symbl, 0);
3326 oprnd1 = TREE_OPERAND (symbl, 1);
3329 /* Only {address_base + offset} expressions are supported,
3330 where address_base can be POINTER_TYPE or ARRAY_TYPE and
3331 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
3332 TODO: swap operands if {offset + address_base}. */
3333 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
3334 && TREE_CODE (oprnd1) != INTEGER_CST)
3335 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
3338 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
3341 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
3342 loop_vinfo, &new_dr);
3346 /* symbl remains unchanged. */
3350 if (vect_debug_details (NULL))
3352 fprintf (dump_file, "unhandled data ref: ");
3353 print_generic_expr (dump_file, memref, TDF_SLIM);
3354 fprintf (dump_file, " (symbl ");
3355 print_generic_expr (dump_file, symbl, TDF_SLIM);
3356 fprintf (dump_file, ") in stmt ");
3357 print_generic_expr (dump_file, stmt, TDF_SLIM);
3364 offset = size_zero_node;
3366 /* Store the array base in the stmt info.
3367 For one dimensional array ref a[i], the base is a,
3368 for multidimensional a[i1][i2]..[iN], the base is
3369 a[i1][i2]..[iN-1]. */
3370 array_base = TREE_OPERAND (memref, 0);
3371 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
3373 new_dr = analyze_array (stmt, memref, is_read);
3376 /* Find the relevant symbol for aliasing purposes. */
3377 base = DR_BASE_NAME (new_dr);
3378 switch (TREE_CODE (base))
3385 symbl = TREE_OPERAND (base, 0);
3389 /* Could have recorded more accurate information -
3390 i.e, the actual FIELD_DECL that is being referenced -
3391 but later passes expect VAR_DECL as the nmt. */
3392 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
3393 loop_vinfo, &offset, &base_aligned_p);
3398 if (vect_debug_details (NULL))
3400 fprintf (dump_file, "unhandled struct/class field access ");
3401 print_generic_expr (dump_file, stmt, TDF_SLIM);
3408 if (vect_debug_details (NULL))
3410 fprintf (dump_file, "unhandled data ref: ");
3411 print_generic_expr (dump_file, memref, TDF_SLIM);
3412 fprintf (dump_file, " in stmt ");
3413 print_generic_expr (dump_file, stmt, TDF_SLIM);
3421 /* Function vect_analyze_data_refs.
3423 Find all the data references in the loop.
3425 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
3426 which base is really an array (not a pointer) and which alignment
3427 can be forced. This restriction will be relaxed. */
3430 vect_analyze_data_refs (loop_vec_info loop_vinfo)
3432 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3433 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3434 int nbbs = loop->num_nodes;
3435 block_stmt_iterator si;
3437 struct data_reference *dr;
3441 if (vect_debug_details (NULL))
3442 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
3444 for (j = 0; j < nbbs; j++)
3446 basic_block bb = bbs[j];
3447 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3449 bool is_read = false;
3450 tree stmt = bsi_stmt (si);
3451 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3452 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3453 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3454 vuse_optype vuses = STMT_VUSE_OPS (stmt);
3455 varray_type *datarefs = NULL;
3456 int nvuses, nv_may_defs, nv_must_defs;
3460 /* Assumption: there exists a data-ref in stmt, if and only if
3461 it has vuses/vdefs. */
3463 if (!vuses && !v_may_defs && !v_must_defs)
3466 nvuses = NUM_VUSES (vuses);
3467 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
3468 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
3470 if (nvuses && (nv_may_defs || nv_must_defs))
3472 if (vect_debug_details (NULL))
3474 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
3475 print_generic_expr (dump_file, stmt, TDF_SLIM);
3480 if (TREE_CODE (stmt) != MODIFY_EXPR)
3482 if (vect_debug_details (NULL))
3484 fprintf (dump_file, "unexpected vops in stmt: ");
3485 print_generic_expr (dump_file, stmt, TDF_SLIM);
3492 memref = TREE_OPERAND (stmt, 1);
3493 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
3498 memref = TREE_OPERAND (stmt, 0);
3499 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
3503 /* Analyze MEMREF. If it is of a supported form, build data_reference
3504 struct for it (DR) and find the relevant symbol for aliasing
3506 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
3509 if (vect_debug_stats (loop) || vect_debug_details (loop))
3511 fprintf (dump_file, "not vectorized: unhandled data ref: ");
3512 print_generic_expr (dump_file, stmt, TDF_SLIM);
3517 /* Find and record the memtag assigned to this data-ref. */
3518 switch (TREE_CODE (symbl))
3521 STMT_VINFO_MEMTAG (stmt_info) = symbl;
3525 symbl = SSA_NAME_VAR (symbl);
3526 tag = get_var_ann (symbl)->type_mem_tag;
3529 tree ptr = TREE_OPERAND (memref, 0);
3530 if (TREE_CODE (ptr) == SSA_NAME)
3531 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
3535 if (vect_debug_stats (loop) || vect_debug_details (loop))
3536 fprintf (dump_file, "not vectorized: no memtag for ref.");
3539 STMT_VINFO_MEMTAG (stmt_info) = tag;
3543 address_base = TREE_OPERAND (symbl, 0);
3545 switch (TREE_CODE (address_base))
3548 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
3549 STMT_VINFO_MEMTAG (stmt_info) = DR_BASE_NAME (dr);
3553 STMT_VINFO_MEMTAG (stmt_info) = address_base;
3557 if (vect_debug_stats (loop) || vect_debug_details (loop))
3559 fprintf (dump_file, "not vectorized: unhandled address expression: ");
3560 print_generic_expr (dump_file, stmt, TDF_SLIM);
3567 if (vect_debug_stats (loop) || vect_debug_details (loop))
3569 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
3570 print_generic_expr (dump_file, memref, TDF_SLIM);
3575 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
3576 STMT_VINFO_DATA_REF (stmt_info) = dr;
3584 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
3586 /* Function vect_mark_relevant.
3588 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3591 vect_mark_relevant (varray_type worklist, tree stmt)
3593 stmt_vec_info stmt_info;
3595 if (vect_debug_details (NULL))
3596 fprintf (dump_file, "mark relevant.");
3598 if (TREE_CODE (stmt) == PHI_NODE)
3600 VARRAY_PUSH_TREE (worklist, stmt);
3604 stmt_info = vinfo_for_stmt (stmt);
3608 if (vect_debug_details (NULL))
3610 fprintf (dump_file, "mark relevant: no stmt info!!.");
3611 print_generic_expr (dump_file, stmt, TDF_SLIM);
3616 if (STMT_VINFO_RELEVANT_P (stmt_info))
3618 if (vect_debug_details (NULL))
3619 fprintf (dump_file, "already marked relevant.");
3623 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
3624 VARRAY_PUSH_TREE (worklist, stmt);
3628 /* Function vect_stmt_relevant_p.
3630 Return true if STMT in loop that is represented by LOOP_VINFO is
3631 "relevant for vectorization".
3633 A stmt is considered "relevant for vectorization" if:
3634 - it has uses outside the loop.
3635 - it has vdefs (it alters memory).
3636 - control stmts in the loop (except for the exit condition).
3638 CHECKME: what other side effects would the vectorizer allow? */
3641 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
3643 v_may_def_optype v_may_defs;
3644 v_must_def_optype v_must_defs;
3645 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3650 /* cond stmt other than loop exit cond. */
3651 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
3654 /* changing memory. */
3655 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3656 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3657 if (v_may_defs || v_must_defs)
3659 if (vect_debug_details (NULL))
3660 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
3664 /* uses outside the loop. */
3665 df = get_immediate_uses (stmt);
3666 num_uses = num_immediate_uses (df);
3667 for (i = 0; i < num_uses; i++)
3669 tree use = immediate_use (df, i);
3670 basic_block bb = bb_for_stmt (use);
3671 if (!flow_bb_inside_loop_p (loop, bb))
3673 if (vect_debug_details (NULL))
3674 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
3683 /* Function vect_mark_stmts_to_be_vectorized.
3685 Not all stmts in the loop need to be vectorized. For example:
3694 Stmt 1 and 3 do not need to be vectorized, because loop control and
3695 addressing of vectorized data-refs are handled differently.
3697 This pass detects such stmts. */
3700 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3702 varray_type worklist;
3703 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3704 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3705 unsigned int nbbs = loop->num_nodes;
3706 block_stmt_iterator si;
3712 stmt_vec_info stmt_info;
3714 if (vect_debug_details (NULL))
3715 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3717 VARRAY_TREE_INIT (worklist, 64, "work list");
3719 /* 1. Init worklist. */
3721 for (i = 0; i < nbbs; i++)
3723 basic_block bb = bbs[i];
3724 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3726 stmt = bsi_stmt (si);
3728 if (vect_debug_details (NULL))
3730 fprintf (dump_file, "init: stmt relevant? ");
3731 print_generic_expr (dump_file, stmt, TDF_SLIM);
3734 stmt_info = vinfo_for_stmt (stmt);
3735 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
3737 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3738 vect_mark_relevant (worklist, stmt);
3743 /* 2. Process_worklist */
3745 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3747 stmt = VARRAY_TOP_TREE (worklist);
3748 VARRAY_POP (worklist);
3750 if (vect_debug_details (NULL))
3752 fprintf (dump_file, "worklist: examine stmt: ");
3753 print_generic_expr (dump_file, stmt, TDF_SLIM);
3756 /* Examine the USES in this statement. Mark all the statements which
3757 feed this statement's uses as "relevant", unless the USE is used as
3760 if (TREE_CODE (stmt) == PHI_NODE)
3762 /* follow the def-use chain inside the loop. */
3763 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3765 tree arg = PHI_ARG_DEF (stmt, j);
3766 tree def_stmt = NULL_TREE;
3768 if (!vect_is_simple_use (arg, loop, &def_stmt))
3770 if (vect_debug_details (NULL))
3771 fprintf (dump_file, "worklist: unsupported use.");
3772 varray_clear (worklist);
3778 if (vect_debug_details (NULL))
3780 fprintf (dump_file, "worklist: def_stmt: ");
3781 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3784 bb = bb_for_stmt (def_stmt);
3785 if (flow_bb_inside_loop_p (loop, bb))
3786 vect_mark_relevant (worklist, def_stmt);
3790 ann = stmt_ann (stmt);
3791 use_ops = USE_OPS (ann);
3793 for (i = 0; i < NUM_USES (use_ops); i++)
3795 tree use = USE_OP (use_ops, i);
3797 /* We are only interested in uses that need to be vectorized. Uses
3798 that are used for address computation are not considered relevant.
3800 if (exist_non_indexing_operands_for_use_p (use, stmt))
3802 tree def_stmt = NULL_TREE;
3804 if (!vect_is_simple_use (use, loop, &def_stmt))
3806 if (vect_debug_details (NULL))
3807 fprintf (dump_file, "worklist: unsupported use.");
3808 varray_clear (worklist);
3815 if (vect_debug_details (NULL))
3817 fprintf (dump_file, "worklist: examine use %d: ", i);
3818 print_generic_expr (dump_file, use, TDF_SLIM);
3821 bb = bb_for_stmt (def_stmt);
3822 if (flow_bb_inside_loop_p (loop, bb))
3823 vect_mark_relevant (worklist, def_stmt);
3826 } /* while worklist */
3828 varray_clear (worklist);
3833 /* Function vect_get_loop_niters.
3835 Determine how many iterations the loop is executed. */
3838 vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3842 if (vect_debug_details (NULL))
3843 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3845 niters = number_of_iterations_in_loop (loop);
3847 if (niters != NULL_TREE
3848 && niters != chrec_dont_know
3849 && host_integerp (niters,0))
3851 *number_of_iterations = TREE_INT_CST_LOW (niters);
3853 if (vect_debug_details (NULL))
3854 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3855 *number_of_iterations);
3858 return get_loop_exit_condition (loop);
3862 /* Function vect_analyze_loop_form.
3864 Verify the following restrictions (some may be relaxed in the future):
3865 - it's an inner-most loop
3866 - number of BBs = 2 (which are the loop header and the latch)
3867 - the loop has a pre-header
3868 - the loop has a single entry and exit
3869 - the loop exit condition is simple enough, and the number of iterations
3870 can be analyzed (a countable loop). */
3872 static loop_vec_info
3873 vect_analyze_loop_form (struct loop *loop)
3875 loop_vec_info loop_vinfo;
3877 HOST_WIDE_INT number_of_iterations = -1;
3879 if (vect_debug_details (loop))
3880 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3883 || !loop->single_exit
3884 || loop->num_nodes != 2)
3886 if (vect_debug_stats (loop) || vect_debug_details (loop))
3888 fprintf (dump_file, "not vectorized: bad loop form. ");
3890 fprintf (dump_file, "nested loop.");
3891 else if (!loop->single_exit)
3892 fprintf (dump_file, "multiple exits.");
3893 else if (loop->num_nodes != 2)
3894 fprintf (dump_file, "too many BBs in loop.");
3900 /* We assume that the loop exit condition is at the end of the loop. i.e,
3901 that the loop is represented as a do-while (with a proper if-guard
3902 before the loop if needed), where the loop header contains all the
3903 executable statements, and the latch is empty. */
3904 if (!empty_block_p (loop->latch))
3906 if (vect_debug_stats (loop) || vect_debug_details (loop))
3907 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3911 if (empty_block_p (loop->header))
3913 if (vect_debug_stats (loop) || vect_debug_details (loop))
3914 fprintf (dump_file, "not vectorized: empty loop.");
3918 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3921 if (vect_debug_stats (loop) || vect_debug_details (loop))
3922 fprintf (dump_file, "not vectorized: complicated exit condition.");
3926 if (number_of_iterations < 0)
3928 if (vect_debug_stats (loop) || vect_debug_details (loop))
3929 fprintf (dump_file, "not vectorized: unknown loop bound.");
3933 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3935 if (vect_debug_stats (loop) || vect_debug_details (loop))
3936 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3940 loop_vinfo = new_loop_vec_info (loop);
3941 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3942 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3948 /* Function vect_analyze_loop.
3950 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3951 for it. The different analyses will record information in the
3952 loop_vec_info struct. */
3954 static loop_vec_info
3955 vect_analyze_loop (struct loop *loop)
3958 loop_vec_info loop_vinfo;
3960 if (vect_debug_details (NULL))
3961 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3963 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3965 loop_vinfo = vect_analyze_loop_form (loop);
3968 if (vect_debug_details (loop))
3969 fprintf (dump_file, "bad loop form.");
3973 /* Find all data references in the loop (which correspond to vdefs/vuses)
3974 and analyze their evolution in the loop.
3976 FORNOW: Handle only simple, array references, which
3977 alignment can be forced, and aligned pointer-references. */
3979 ok = vect_analyze_data_refs (loop_vinfo);
3982 if (vect_debug_details (loop))
3983 fprintf (dump_file, "bad data references.");
3984 destroy_loop_vec_info (loop_vinfo);
3988 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3990 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3993 if (vect_debug_details (loop))
3994 fprintf (dump_file, "unexpected pattern.");
3995 if (vect_debug_details (loop))
3996 fprintf (dump_file, "not vectorized: unexpected pattern.");
3997 destroy_loop_vec_info (loop_vinfo);
4001 /* Check that all cross-iteration scalar data-flow cycles are OK.
4002 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4004 ok = vect_analyze_scalar_cycles (loop_vinfo);
4007 if (vect_debug_details (loop))
4008 fprintf (dump_file, "bad scalar cycle.");
4009 destroy_loop_vec_info (loop_vinfo);
4013 /* Analyze data dependences between the data-refs in the loop.
4014 FORNOW: fail at the first data dependence that we encounter. */
4016 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4019 if (vect_debug_details (loop))
4020 fprintf (dump_file, "bad data dependence.");
4021 destroy_loop_vec_info (loop_vinfo);
4025 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4026 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4028 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4031 if (vect_debug_details (loop))
4032 fprintf (dump_file, "bad data access.");
4033 destroy_loop_vec_info (loop_vinfo);
4037 /* Analyze the alignment of the data-refs in the loop.
4038 FORNOW: Only aligned accesses are handled. */
4040 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4043 if (vect_debug_details (loop))
4044 fprintf (dump_file, "bad data alignment.");
4045 destroy_loop_vec_info (loop_vinfo);
4049 /* Scan all the operations in the loop and make sure they are
4052 ok = vect_analyze_operations (loop_vinfo);
4055 if (vect_debug_details (loop))
4056 fprintf (dump_file, "bad operation or unsupported loop bound.");
4057 destroy_loop_vec_info (loop_vinfo);
4061 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4067 /* Function need_imm_uses_for.
4069 Return whether we ought to include information for 'var'
4070 when calculating immediate uses. For this pass we only want use
4071 information for non-virtual variables. */
4074 need_imm_uses_for (tree var)
4076 return is_gimple_reg (var);
4080 /* Function vectorize_loops.
4082 Entry Point to loop vectorization phase. */
4085 vectorize_loops (struct loops *loops)
4087 unsigned int i, loops_num;
4088 unsigned int num_vectorized_loops = 0;
4090 /* Does the target support SIMD? */
4091 /* FORNOW: until more sophisticated machine modelling is in place. */
4092 if (!UNITS_PER_SIMD_WORD)
4094 if (vect_debug_details (NULL))
4095 fprintf (dump_file, "vectorizer: target vector size is not defined.");
4099 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
4101 /* ----------- Analyze loops. ----------- */
4103 /* If some loop was duplicated, it gets bigger number
4104 than all previously defined loops. This fact allows us to run
4105 only over initial loops skipping newly generated ones. */
4106 loops_num = loops->num;
4107 for (i = 1; i < loops_num; i++)
4109 loop_vec_info loop_vinfo;
4110 struct loop *loop = loops->parray[i];
4115 loop_vinfo = vect_analyze_loop (loop);
4116 loop->aux = loop_vinfo;
4118 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
4121 vect_transform_loop (loop_vinfo, loops);
4122 num_vectorized_loops++;
4125 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
4126 fprintf (dump_file, "\nvectorized %u loops in function.\n",
4127 num_vectorized_loops);
4129 /* ----------- Finalize. ----------- */
4132 for (i = 1; i < loops_num; i++)
4134 struct loop *loop = loops->parray[i];
4135 loop_vec_info loop_vinfo;
4139 loop_vinfo = loop->aux;
4140 destroy_loop_vec_info (loop_vinfo);
4144 rewrite_into_ssa (false);
4145 if (bitmap_first_set_bit (vars_to_rename) >= 0)
4147 /* The rewrite of ssa names may cause violation of loop closed ssa
4148 form invariants. TODO -- avoid these rewrites completely.
4149 Information in virtual phi nodes is sufficient for it. */
4150 rewrite_into_loop_closed_ssa ();
4152 bitmap_clear (vars_to_rename);