1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 #include "coretypes.h"
31 #include "basic-block.h"
32 #include "diagnostic.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
63 /* Utility function dealing with loop peeling (not peeling itself). */
64 static void vect_generate_tmps_on_preheader
65 (loop_vec_info, tree *, tree *, tree *);
66 static tree vect_build_loop_niters (loop_vec_info);
67 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
68 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
69 static void vect_update_init_of_dr (struct data_reference *, tree niters);
70 static void vect_update_inits_of_drs (loop_vec_info, tree);
71 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
72 static void vect_do_peeling_for_loop_bound
73 (loop_vec_info, tree *, struct loops *);
76 /* Function vect_get_new_vect_var.
78 Returns a name for a new variable. The current naming scheme appends the
79 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
80 the name of vectorizer generated variables, and appends that to NAME if
84 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
89 if (var_kind == vect_simple_var)
95 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
97 new_vect_var = create_tmp_var (type, prefix);
103 /* Function vect_create_index_for_vector_ref.
105 Create (and return) an index variable, along with it's update chain in the
106 loop. This variable will be used to access a memory location in a vector
110 LOOP: The loop being vectorized.
111 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
112 function can be added here, or in the loop pre-header.
115 Return an index that will be used to index a vector array. It is expected
116 that a pointer to the first vector will be used as the base address for the
119 FORNOW: we are not trying to be efficient, just creating a new index each
120 time from scratch. At this time all vector references could use the same
123 TODO: create only one index to be used by all vector references. Record
124 the index in the LOOP_VINFO the first time this procedure is called and
125 return it on subsequent calls. The increment of this index must be placed
126 just before the conditional expression that ends the single block loop. */
129 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
132 block_stmt_iterator incr_bsi;
134 tree indx_before_incr, indx_after_incr;
135 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
138 /* It is assumed that the base pointer used for vectorized access contains
139 the address of the first vector. Therefore the index used for vectorized
140 access must be initialized to zero and incremented by 1. */
142 init = integer_zero_node;
143 step = integer_one_node;
145 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
146 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
147 &indx_before_incr, &indx_after_incr);
148 incr = bsi_stmt (incr_bsi);
149 get_stmt_operands (incr);
150 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
152 return indx_before_incr;
156 /* Function vect_create_addr_base_for_vector_ref.
158 Create an expression that computes the address of the first memory location
159 that will be accessed for a data reference.
162 STMT: The statement containing the data reference.
163 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
164 OFFSET: Optional. If supplied, it is be added to the initial address.
167 1. Return an SSA_NAME whose value is the address of the memory location of
168 the first vector of the data reference.
169 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
170 these statement(s) which define the returned SSA_NAME.
172 FORNOW: We are only handling array accesses with step 1. */
175 vect_create_addr_base_for_vector_ref (tree stmt,
179 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
180 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
182 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
183 tree base_name = build_fold_indirect_ref (data_ref_base);
184 tree ref = DR_REF (dr);
185 tree scalar_type = TREE_TYPE (ref);
186 tree scalar_ptr_type = build_pointer_type (scalar_type);
189 tree addr_base, addr_expr;
191 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
193 /* Create base_offset */
194 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
195 add_referenced_tmp_var (dest);
196 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
197 append_to_statement_list_force (new_stmt, new_stmt_list);
201 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
202 add_referenced_tmp_var (tmp);
203 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
204 STMT_VINFO_VECT_STEP (stmt_info)));
205 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset),
206 base_offset, offset));
207 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
208 append_to_statement_list_force (new_stmt, new_stmt_list);
211 /* base + base_offset */
212 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
215 /* addr_expr = addr_base */
216 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
217 get_name (base_name));
218 add_referenced_tmp_var (addr_expr);
219 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
220 new_temp = make_ssa_name (addr_expr, vec_stmt);
221 TREE_OPERAND (vec_stmt, 0) = new_temp;
222 append_to_statement_list_force (vec_stmt, new_stmt_list);
224 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
226 fprintf (vect_dump, "created ");
227 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
233 /* Function vect_align_data_ref.
235 Handle mislignment of a memory accesses.
237 FORNOW: Can't handle misaligned accesses.
238 Make sure that the dataref is aligned. */
241 vect_align_data_ref (tree stmt)
243 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
244 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
246 /* FORNOW: can't handle misaligned accesses;
247 all accesses expected to be aligned. */
248 gcc_assert (aligned_access_p (dr));
252 /* Function vect_create_data_ref_ptr.
254 Create a memory reference expression for vector access, to be used in a
255 vector load/store stmt. The reference is based on a new pointer to vector
259 1. STMT: a stmt that references memory. Expected to be of the form
260 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
261 2. BSI: block_stmt_iterator where new stmts can be added.
262 3. OFFSET (optional): an offset to be added to the initial address accessed
263 by the data-ref in STMT.
264 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
265 pointing to the initial address.
268 1. Declare a new ptr to vector_type, and have it point to the base of the
269 data reference (initial addressed accessed by the data reference).
270 For example, for vector of type V8HI, the following code is generated:
273 vp = (v8hi *)initial_address;
275 if OFFSET is not supplied:
276 initial_address = &a[init];
277 if OFFSET is supplied:
278 initial_address = &a[init + OFFSET];
280 Return the initial_address in INITIAL_ADDRESS.
282 2. Create a data-reference in the loop based on the new vector pointer vp,
283 and using a new index variable 'idx' as follows:
287 where if ONLY_INIT is true:
290 update = idx + vector_type_size
292 Return the pointer vp'.
295 FORNOW: handle only aligned and consecutive accesses. */
298 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
299 tree *initial_address, bool only_init)
302 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
303 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
304 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
305 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
311 tree new_stmt_list = NULL_TREE;
313 edge pe = loop_preheader_edge (loop);
319 tree type, tmp, size;
321 base_name = build_fold_indirect_ref (unshare_expr (
322 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
324 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
326 tree data_ref_base = base_name;
327 fprintf (vect_dump, "create array_ref of type: ");
328 print_generic_expr (vect_dump, vectype, TDF_SLIM);
329 if (TREE_CODE (data_ref_base) == VAR_DECL)
330 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
331 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
332 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
333 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
334 fprintf (vect_dump, " vectorizing a record based array ref: ");
335 else if (TREE_CODE (data_ref_base) == SSA_NAME)
336 fprintf (vect_dump, " vectorizing a pointer ref: ");
337 print_generic_expr (vect_dump, base_name, TDF_SLIM);
340 /** (1) Create the new vector-pointer variable: **/
342 vect_ptr_type = build_pointer_type (vectype);
343 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
344 get_name (base_name));
345 add_referenced_tmp_var (vect_ptr);
348 /** (2) Add aliasing information to the new vector-pointer:
349 (The points-to info (SSA_NAME_PTR_INFO) may be defined later.) **/
351 tag = STMT_VINFO_MEMTAG (stmt_info);
354 /* If the memory tag of the original reference was not a type tag or
355 if the pointed-to type of VECT_PTR has an alias set number
356 different than TAG's, then we need to create a new type tag for
357 VECT_PTR and add TAG to its alias set. */
358 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG
359 || get_alias_set (tag) != get_alias_set (TREE_TYPE (vect_ptr_type)))
360 add_type_alias (vect_ptr, tag);
362 var_ann (vect_ptr)->type_mem_tag = tag;
364 var_ann (vect_ptr)->subvars = STMT_VINFO_SUBVARS (stmt_info);
366 /** (3) Calculate the initial address the vector-pointer, and set
367 the vector-pointer to point to it before the loop: **/
369 /* Create: (&(base[init_val+offset]) in the loop preheader. */
370 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
372 pe = loop_preheader_edge (loop);
373 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
374 gcc_assert (!new_bb);
375 *initial_address = new_temp;
377 /* Create: p = (vectype *) initial_base */
378 vec_stmt = fold_convert (vect_ptr_type, new_temp);
379 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
380 new_temp = make_ssa_name (vect_ptr, vec_stmt);
381 TREE_OPERAND (vec_stmt, 0) = new_temp;
382 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
383 gcc_assert (!new_bb);
384 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
387 /** (4) Handle the updating of the vector-pointer inside the loop: **/
389 if (only_init) /* No update in loop is required. */
391 /* Copy the points-to information if it exists. */
392 if (STMT_VINFO_PTR_INFO (stmt_info))
393 duplicate_ssa_name_ptr_info (vect_ptr_init,
394 STMT_VINFO_PTR_INFO (stmt_info));
395 return vect_ptr_init;
398 idx = vect_create_index_for_vector_ref (loop_vinfo);
400 /* Create: update = idx * vectype_size */
401 tmp = create_tmp_var (integer_type_node, "update");
402 add_referenced_tmp_var (tmp);
403 size = TYPE_SIZE (vect_ptr_type);
404 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
405 ptr_update = create_tmp_var (type, "update");
406 add_referenced_tmp_var (ptr_update);
407 vectype_size = TYPE_SIZE_UNIT (vectype);
408 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
409 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
410 new_temp = make_ssa_name (tmp, vec_stmt);
411 TREE_OPERAND (vec_stmt, 0) = new_temp;
412 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
413 vec_stmt = fold_convert (type, new_temp);
414 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
415 new_temp = make_ssa_name (ptr_update, vec_stmt);
416 TREE_OPERAND (vec_stmt, 0) = new_temp;
417 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
419 /* Create: data_ref_ptr = vect_ptr_init + update */
420 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
421 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
422 new_temp = make_ssa_name (vect_ptr, vec_stmt);
423 TREE_OPERAND (vec_stmt, 0) = new_temp;
424 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
425 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
427 /* Copy the points-to information if it exists. */
428 if (STMT_VINFO_PTR_INFO (stmt_info))
429 duplicate_ssa_name_ptr_info (data_ref_ptr, STMT_VINFO_PTR_INFO (stmt_info));
434 /* Function vect_create_destination_var.
436 Create a new temporary of type VECTYPE. */
439 vect_create_destination_var (tree scalar_dest, tree vectype)
442 const char *new_name;
444 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
446 new_name = get_name (scalar_dest);
449 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
450 add_referenced_tmp_var (vec_dest);
456 /* Function vect_init_vector.
458 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
459 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
460 used in the vectorization of STMT. */
463 vect_init_vector (tree stmt, tree vector_var)
465 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
467 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
470 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
476 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
477 add_referenced_tmp_var (new_var);
479 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
480 new_temp = make_ssa_name (new_var, init_stmt);
481 TREE_OPERAND (init_stmt, 0) = new_temp;
483 pe = loop_preheader_edge (loop);
484 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
485 gcc_assert (!new_bb);
487 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
489 fprintf (vect_dump, "created new init_stmt: ");
490 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
493 vec_oprnd = TREE_OPERAND (init_stmt, 0);
498 /* Function vect_get_vec_def_for_operand.
500 OP is an operand in STMT. This function returns a (vector) def that will be
501 used in the vectorized stmt for STMT.
503 In the case that OP is an SSA_NAME which is defined in the loop, then
504 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
506 In case OP is an invariant or constant, a new stmt that creates a vector def
507 needs to be introduced. */
510 vect_get_vec_def_for_operand (tree op, tree stmt)
515 stmt_vec_info def_stmt_info = NULL;
516 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
517 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
518 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
519 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
520 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
527 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
529 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
530 print_generic_expr (vect_dump, op, TDF_SLIM);
533 /** ===> Case 1: operand is a constant. **/
535 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
537 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
541 /* Build a tree with vector elements. */
542 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
543 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
545 for (i = nunits - 1; i >= 0; --i)
547 t = tree_cons (NULL_TREE, op, t);
549 vec_cst = build_vector (vectype, t);
550 return vect_init_vector (stmt, vec_cst);
553 gcc_assert (TREE_CODE (op) == SSA_NAME);
555 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
557 def_stmt = SSA_NAME_DEF_STMT (op);
558 def_stmt_info = vinfo_for_stmt (def_stmt);
560 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
562 fprintf (vect_dump, "vect_get_vec_def_for_operand: def_stmt: ");
563 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
567 /** ==> Case 2.1: operand is defined inside the loop. **/
571 /* Get the def from the vectorized stmt. */
573 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
574 gcc_assert (vec_stmt);
575 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
580 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
581 it is a reduction/induction. **/
583 bb = bb_for_stmt (def_stmt);
584 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
586 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
587 fprintf (vect_dump, "reduction/induction - unsupported.");
588 internal_error ("no support for reduction/induction"); /* FORNOW */
592 /** ==> Case 2.3: operand is defined outside the loop -
593 it is a loop invariant. */
595 switch (TREE_CODE (def_stmt))
598 def = PHI_RESULT (def_stmt);
601 def = TREE_OPERAND (def_stmt, 0);
604 def = TREE_OPERAND (def_stmt, 0);
605 gcc_assert (IS_EMPTY_STMT (def_stmt));
609 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
611 fprintf (vect_dump, "unsupported defining stmt: ");
612 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
614 internal_error ("unsupported defining stmt");
617 /* Build a tree with vector elements.
618 Create 'vec_inv = {inv,inv,..,inv}' */
620 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
621 fprintf (vect_dump, "Create vector_inv.");
623 for (i = nunits - 1; i >= 0; --i)
625 t = tree_cons (NULL_TREE, def, t);
628 vec_inv = build_constructor (vectype, t);
629 return vect_init_vector (stmt, vec_inv);
633 /* Function vect_finish_stmt_generation.
635 Insert a new stmt. */
638 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
640 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
642 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
644 fprintf (vect_dump, "add new stmt: ");
645 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
648 #ifdef ENABLE_CHECKING
649 /* Make sure bsi points to the stmt that is being vectorized. */
650 gcc_assert (stmt == bsi_stmt (*bsi));
653 #ifdef USE_MAPPED_LOCATION
654 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
656 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
661 /* Function vectorizable_assignment.
663 Check if STMT performs an assignment (copy) that can be vectorized.
664 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
665 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
666 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
669 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
675 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
676 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
677 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
680 /* Is vectorizable assignment? */
682 if (TREE_CODE (stmt) != MODIFY_EXPR)
685 scalar_dest = TREE_OPERAND (stmt, 0);
686 if (TREE_CODE (scalar_dest) != SSA_NAME)
689 op = TREE_OPERAND (stmt, 1);
690 if (!vect_is_simple_use (op, loop_vinfo, NULL))
692 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
693 fprintf (vect_dump, "use not simple.");
697 if (!vec_stmt) /* transformation not required. */
699 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
704 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
705 fprintf (vect_dump, "transform assignment.");
708 vec_dest = vect_create_destination_var (scalar_dest, vectype);
711 op = TREE_OPERAND (stmt, 1);
712 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
714 /* Arguments are ready. create the new vector stmt. */
715 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
716 new_temp = make_ssa_name (vec_dest, *vec_stmt);
717 TREE_OPERAND (*vec_stmt, 0) = new_temp;
718 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
724 /* Function vectorizable_operation.
726 Check if STMT performs a binary or unary operation that can be vectorized.
727 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
728 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
729 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
732 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
737 tree op0, op1 = NULL;
738 tree vec_oprnd0, vec_oprnd1=NULL;
739 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
740 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
741 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
744 enum machine_mode vec_mode;
750 /* Is STMT a vectorizable binary/unary operation? */
751 if (TREE_CODE (stmt) != MODIFY_EXPR)
754 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
757 operation = TREE_OPERAND (stmt, 1);
758 code = TREE_CODE (operation);
759 optab = optab_for_tree_code (code, vectype);
761 /* Support only unary or binary operations. */
762 op_type = TREE_CODE_LENGTH (code);
763 if (op_type != unary_op && op_type != binary_op)
765 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
766 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
770 for (i = 0; i < op_type; i++)
772 op = TREE_OPERAND (operation, i);
773 if (!vect_is_simple_use (op, loop_vinfo, NULL))
775 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
776 fprintf (vect_dump, "use not simple.");
781 /* Supportable by target? */
784 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
785 fprintf (vect_dump, "no optab.");
788 vec_mode = TYPE_MODE (vectype);
789 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
791 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
792 fprintf (vect_dump, "op not supported by target.");
796 if (!vec_stmt) /* transformation not required. */
798 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
804 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
805 fprintf (vect_dump, "transform binary/unary operation.");
808 scalar_dest = TREE_OPERAND (stmt, 0);
809 vec_dest = vect_create_destination_var (scalar_dest, vectype);
812 op0 = TREE_OPERAND (operation, 0);
813 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
815 if (op_type == binary_op)
817 op1 = TREE_OPERAND (operation, 1);
818 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
821 /* Arguments are ready. create the new vector stmt. */
823 if (op_type == binary_op)
824 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
825 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
827 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
828 build1 (code, vectype, vec_oprnd0));
829 new_temp = make_ssa_name (vec_dest, *vec_stmt);
830 TREE_OPERAND (*vec_stmt, 0) = new_temp;
831 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
837 /* Function vectorizable_store.
839 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
841 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
842 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
843 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
846 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
852 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
853 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
854 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
855 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
856 enum machine_mode vec_mode;
858 enum dr_alignment_support alignment_support_cheme;
859 v_may_def_optype v_may_defs;
862 /* Is vectorizable store? */
864 if (TREE_CODE (stmt) != MODIFY_EXPR)
867 scalar_dest = TREE_OPERAND (stmt, 0);
868 if (TREE_CODE (scalar_dest) != ARRAY_REF
869 && TREE_CODE (scalar_dest) != INDIRECT_REF)
872 op = TREE_OPERAND (stmt, 1);
873 if (!vect_is_simple_use (op, loop_vinfo, NULL))
875 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
876 fprintf (vect_dump, "use not simple.");
880 vec_mode = TYPE_MODE (vectype);
881 /* FORNOW. In some cases can vectorize even if data-type not supported
882 (e.g. - array initialization with 0). */
883 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
886 if (!STMT_VINFO_DATA_REF (stmt_info))
890 if (!vec_stmt) /* transformation not required. */
892 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
898 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
899 fprintf (vect_dump, "transform store");
901 alignment_support_cheme = vect_supportable_dr_alignment (dr);
902 gcc_assert (alignment_support_cheme);
903 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
905 /* Handle use - get the vectorized def from the defining stmt. */
906 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
909 /* FORNOW: make sure the data reference is aligned. */
910 vect_align_data_ref (stmt);
911 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
912 data_ref = build_fold_indirect_ref (data_ref);
914 /* Arguments are ready. create the new vector stmt. */
915 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
916 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
918 /* Copy the V_MAY_DEFS representing the aliasing of the original array
919 element's definition to the vector's definition then update the
920 defining statement. The original is being deleted so the same
921 SSA_NAMEs can be used. */
922 copy_virtual_operands (*vec_stmt, stmt);
923 v_may_defs = STMT_V_MAY_DEF_OPS (*vec_stmt);
924 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
926 for (i = 0; i < nv_may_defs; i++)
928 tree ssa_name = V_MAY_DEF_RESULT (v_may_defs, i);
929 SSA_NAME_DEF_STMT (ssa_name) = *vec_stmt;
936 /* vectorizable_load.
938 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
940 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
941 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
942 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
945 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
948 tree vec_dest = NULL;
949 tree data_ref = NULL;
951 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
952 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
953 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
960 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
961 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
962 edge pe = loop_preheader_edge (loop);
963 enum dr_alignment_support alignment_support_cheme;
965 /* Is vectorizable load? */
967 if (TREE_CODE (stmt) != MODIFY_EXPR)
970 scalar_dest = TREE_OPERAND (stmt, 0);
971 if (TREE_CODE (scalar_dest) != SSA_NAME)
974 op = TREE_OPERAND (stmt, 1);
975 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
978 if (!STMT_VINFO_DATA_REF (stmt_info))
981 mode = (int) TYPE_MODE (vectype);
983 /* FORNOW. In some cases can vectorize even if data-type not supported
984 (e.g. - data copies). */
985 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
987 if (vect_print_dump_info (REPORT_DETAILS, LOOP_LOC (loop_vinfo)))
988 fprintf (vect_dump, "Aligned load, but unsupported type.");
992 if (!vec_stmt) /* transformation not required. */
994 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1000 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1001 fprintf (vect_dump, "transform load.");
1003 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1004 gcc_assert (alignment_support_cheme);
1006 if (alignment_support_cheme == dr_aligned
1007 || alignment_support_cheme == dr_unaligned_supported)
1018 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1019 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1020 if (aligned_access_p (dr))
1021 data_ref = build_fold_indirect_ref (data_ref);
1024 int mis = DR_MISALIGNMENT (dr);
1025 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1026 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1027 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1029 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1030 new_temp = make_ssa_name (vec_dest, new_stmt);
1031 TREE_OPERAND (new_stmt, 0) = new_temp;
1032 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1033 copy_virtual_operands (new_stmt, stmt);
1035 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1039 msq_init = *(floor(p1))
1040 p2 = initial_addr + VS - 1;
1041 magic = have_builtin ? builtin_result : initial_address;
1044 p2' = p2 + indx * vectype_size
1046 vec_dest = realign_load (msq, lsq, magic)
1060 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1061 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1062 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1064 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1065 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1066 new_temp = make_ssa_name (vec_dest, new_stmt);
1067 TREE_OPERAND (new_stmt, 0) = new_temp;
1068 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1069 gcc_assert (!new_bb);
1070 msq_init = TREE_OPERAND (new_stmt, 0);
1071 copy_virtual_operands (new_stmt, stmt);
1072 update_vuses_to_preheader (new_stmt, loop);
1075 /* <2> Create lsq = *(floor(p2')) in the loop */
1076 offset = build_int_cst (integer_type_node,
1077 TYPE_VECTOR_SUBPARTS (vectype));
1078 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1079 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1080 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1081 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1082 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1083 new_temp = make_ssa_name (vec_dest, new_stmt);
1084 TREE_OPERAND (new_stmt, 0) = new_temp;
1085 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1086 lsq = TREE_OPERAND (new_stmt, 0);
1087 copy_virtual_operands (new_stmt, stmt);
1091 if (targetm.vectorize.builtin_mask_for_load)
1093 /* Create permutation mask, if required, in loop preheader. */
1095 params = build_tree_list (NULL_TREE, init_addr);
1096 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1097 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1098 new_stmt = build_function_call_expr (builtin_decl, params);
1099 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1100 new_temp = make_ssa_name (vec_dest, new_stmt);
1101 TREE_OPERAND (new_stmt, 0) = new_temp;
1102 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1103 gcc_assert (!new_bb);
1104 magic = TREE_OPERAND (new_stmt, 0);
1106 /* The result of the CALL_EXPR to this builtin is determined from
1107 the value of the parameter and no global variables are touched
1108 which makes the builtin a "const" function. Requiring the
1109 builtin to have the "const" attribute makes it unnecessary
1110 to call mark_call_clobbered_vars_to_rename. */
1111 gcc_assert (TREE_READONLY (builtin_decl));
1115 /* Use current address instead of init_addr for reduced reg pressure.
1117 magic = dataref_ptr;
1121 /* <4> Create msq = phi <msq_init, lsq> in loop */
1122 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1123 msq = make_ssa_name (vec_dest, NULL_TREE);
1124 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1125 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1126 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1127 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1130 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1131 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1132 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1133 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1134 new_temp = make_ssa_name (vec_dest, new_stmt);
1135 TREE_OPERAND (new_stmt, 0) = new_temp;
1136 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1141 *vec_stmt = new_stmt;
1145 /* Function vect_is_simple_cond.
1148 LOOP - the loop that is being vectorized.
1149 COND - Condition that is checked for simple use.
1151 Returns whether a COND can be vectorized. Checkes whether
1152 condition operands are supportable using vec_is_simple_use. */
1155 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1159 if (TREE_CODE_CLASS (TREE_CODE (cond)) != tcc_comparison)
1162 lhs = TREE_OPERAND (cond, 0);
1163 rhs = TREE_OPERAND (cond, 1);
1165 if (TREE_CODE (lhs) == SSA_NAME)
1167 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1168 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt))
1171 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1174 if (TREE_CODE (rhs) == SSA_NAME)
1176 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1177 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt))
1180 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1186 /* vectorizable_condition.
1188 Check if STMT is conditional modify expression that can be vectorized.
1189 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1190 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1193 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1196 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1198 tree scalar_dest = NULL_TREE;
1199 tree vec_dest = NULL_TREE;
1200 tree op = NULL_TREE;
1201 tree cond_expr, then_clause, else_clause;
1202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1203 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1204 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1205 tree vec_compare, vec_cond_expr;
1207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1208 enum machine_mode vec_mode;
1210 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1213 if (TREE_CODE (stmt) != MODIFY_EXPR)
1216 op = TREE_OPERAND (stmt, 1);
1218 if (TREE_CODE (op) != COND_EXPR)
1221 cond_expr = TREE_OPERAND (op, 0);
1222 then_clause = TREE_OPERAND (op, 1);
1223 else_clause = TREE_OPERAND (op, 2);
1225 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
1228 if (TREE_CODE (then_clause) == SSA_NAME)
1230 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
1231 if (!vect_is_simple_use (then_clause, loop_vinfo, &then_def_stmt))
1234 else if (TREE_CODE (then_clause) != INTEGER_CST
1235 && TREE_CODE (then_clause) != REAL_CST)
1238 if (TREE_CODE (else_clause) == SSA_NAME)
1240 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
1241 if (!vect_is_simple_use (else_clause, loop_vinfo, &else_def_stmt))
1244 else if (TREE_CODE (else_clause) != INTEGER_CST
1245 && TREE_CODE (else_clause) != REAL_CST)
1249 vec_mode = TYPE_MODE (vectype);
1253 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
1254 return expand_vec_cond_expr_p (op, vec_mode);
1260 scalar_dest = TREE_OPERAND (stmt, 0);
1261 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1263 /* Handle cond expr. */
1265 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt);
1267 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt);
1268 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt);
1269 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt);
1271 /* Arguments are ready. create the new vector stmt. */
1272 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
1273 vec_cond_lhs, vec_cond_rhs);
1274 vec_cond_expr = build (VEC_COND_EXPR, vectype,
1275 vec_compare, vec_then_clause, vec_else_clause);
1277 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
1278 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1279 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1280 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1285 /* Function vect_transform_stmt.
1287 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1290 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1292 bool is_store = false;
1293 tree vec_stmt = NULL_TREE;
1294 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1297 switch (STMT_VINFO_TYPE (stmt_info))
1299 case op_vec_info_type:
1300 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1304 case assignment_vec_info_type:
1305 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1309 case load_vec_info_type:
1310 done = vectorizable_load (stmt, bsi, &vec_stmt);
1314 case store_vec_info_type:
1315 done = vectorizable_store (stmt, bsi, &vec_stmt);
1320 case condition_vec_info_type:
1321 done = vectorizable_condition (stmt, bsi, &vec_stmt);
1326 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1327 fprintf (vect_dump, "stmt not supported.");
1331 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1337 /* This function builds ni_name = number of iterations loop executes
1338 on the loop preheader. */
1341 vect_build_loop_niters (loop_vec_info loop_vinfo)
1343 tree ni_name, stmt, var;
1345 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1346 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
1348 var = create_tmp_var (TREE_TYPE (ni), "niters");
1349 add_referenced_tmp_var (var);
1350 ni_name = force_gimple_operand (ni, &stmt, false, var);
1352 pe = loop_preheader_edge (loop);
1355 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1356 gcc_assert (!new_bb);
1363 /* This function generates the following statements:
1365 ni_name = number of iterations loop executes
1366 ratio = ni_name / vf
1367 ratio_mult_vf_name = ratio * vf
1369 and places them at the loop preheader edge. */
1372 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
1374 tree *ratio_mult_vf_name_ptr,
1375 tree *ratio_name_ptr)
1383 tree ratio_mult_vf_name;
1384 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1385 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
1386 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1387 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
1389 pe = loop_preheader_edge (loop);
1391 /* Generate temporary variable that contains
1392 number of iterations loop executes. */
1394 ni_name = vect_build_loop_niters (loop_vinfo);
1396 /* Create: ratio = ni >> log2(vf) */
1398 var = create_tmp_var (TREE_TYPE (ni), "bnd");
1399 add_referenced_tmp_var (var);
1400 ratio_name = make_ssa_name (var, NULL_TREE);
1401 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
1402 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
1403 SSA_NAME_DEF_STMT (ratio_name) = stmt;
1405 pe = loop_preheader_edge (loop);
1406 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1407 gcc_assert (!new_bb);
1409 /* Create: ratio_mult_vf = ratio << log2 (vf). */
1411 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
1412 add_referenced_tmp_var (var);
1413 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
1414 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
1415 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
1416 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
1418 pe = loop_preheader_edge (loop);
1419 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1420 gcc_assert (!new_bb);
1422 *ni_name_ptr = ni_name;
1423 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
1424 *ratio_name_ptr = ratio_name;
1430 /* Function update_vuses_to_preheader.
1433 STMT - a statement with potential VUSEs.
1434 LOOP - the loop whose preheader will contain STMT.
1436 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
1437 appears to be defined in a V_MAY_DEF in another statement in a loop.
1438 One such case is when the VUSE is at the dereference of a __restricted__
1439 pointer in a load and the V_MAY_DEF is at the dereference of a different
1440 __restricted__ pointer in a store. Vectorization may result in
1441 copy_virtual_uses being called to copy the problematic VUSE to a new
1442 statement that is being inserted in the loop preheader. This procedure
1443 is called to change the SSA_NAME in the new statement's VUSE from the
1444 SSA_NAME updated in the loop to the related SSA_NAME available on the
1445 path entering the loop.
1447 When this function is called, we have the following situation:
1452 # name1 = phi < name0 , name2>
1457 # name2 = vdef <name1>
1462 Stmt S1 was created in the loop preheader block as part of misaligned-load
1463 handling. This function fixes the name of the vuse of S1 from 'name1' to
1467 update_vuses_to_preheader (tree stmt, struct loop *loop)
1469 basic_block header_bb = loop->header;
1470 edge preheader_e = loop_preheader_edge (loop);
1471 vuse_optype vuses = STMT_VUSE_OPS (stmt);
1472 int nvuses = NUM_VUSES (vuses);
1475 for (i = 0; i < nvuses; i++)
1477 tree ssa_name = VUSE_OP (vuses, i);
1478 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
1479 tree name_var = SSA_NAME_VAR (ssa_name);
1480 basic_block bb = bb_for_stmt (def_stmt);
1482 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
1483 if (!IS_EMPTY_STMT (def_stmt)
1484 && flow_bb_inside_loop_p (loop, bb))
1486 /* If the block containing the statement defining the SSA_NAME
1487 is in the loop then it's necessary to find the definition
1488 outside the loop using the PHI nodes of the header. */
1490 bool updated = false;
1492 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
1494 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
1496 SET_VUSE_OP (vuses, i,
1497 PHI_ARG_DEF (phi, preheader_e->dest_idx));
1502 gcc_assert (updated);
1508 /* Function vect_update_ivs_after_vectorizer.
1510 "Advance" the induction variables of LOOP to the value they should take
1511 after the execution of LOOP. This is currently necessary because the
1512 vectorizer does not handle induction variables that are used after the
1513 loop. Such a situation occurs when the last iterations of LOOP are
1515 1. We introduced new uses after LOOP for IVs that were not originally used
1516 after LOOP: the IVs of LOOP are now used by an epilog loop.
1517 2. LOOP is going to be vectorized; this means that it will iterate N/VF
1518 times, whereas the loop IVs should be bumped N times.
1521 - LOOP - a loop that is going to be vectorized. The last few iterations
1522 of LOOP were peeled.
1523 - NITERS - the number of iterations that LOOP executes (before it is
1524 vectorized). i.e, the number of times the ivs should be bumped.
1525 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
1526 coming out from LOOP on which there are uses of the LOOP ivs
1527 (this is the path from LOOP->exit to epilog_loop->preheader).
1529 The new definitions of the ivs are placed in LOOP->exit.
1530 The phi args associated with the edge UPDATE_E in the bb
1531 UPDATE_E->dest are updated accordingly.
1533 Assumption 1: Like the rest of the vectorizer, this function assumes
1534 a single loop exit that has a single predecessor.
1536 Assumption 2: The phi nodes in the LOOP header and in update_bb are
1537 organized in the same order.
1539 Assumption 3: The access function of the ivs is simple enough (see
1540 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
1542 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
1543 coming out of LOOP on which the ivs of LOOP are used (this is the path
1544 that leads to the epilog loop; other paths skip the epilog loop). This
1545 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
1546 needs to have its phis updated.
1550 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
1553 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1554 basic_block exit_bb = loop->single_exit->dest;
1556 basic_block update_bb = update_e->dest;
1558 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
1560 /* Make sure there exists a single-predecessor exit bb: */
1561 gcc_assert (single_pred_p (exit_bb));
1563 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
1565 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
1567 tree access_fn = NULL;
1568 tree evolution_part;
1571 tree var, stmt, ni, ni_name;
1572 block_stmt_iterator last_bsi;
1574 /* Skip virtual phi's. */
1575 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
1577 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1578 fprintf (vect_dump, "virtual phi. skip.");
1582 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
1583 gcc_assert (access_fn);
1585 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
1586 gcc_assert (evolution_part != NULL_TREE);
1588 /* FORNOW: We do not support IVs whose evolution function is a polynomial
1589 of degree >= 2 or exponential. */
1590 gcc_assert (!tree_is_chrec (evolution_part));
1592 step_expr = evolution_part;
1593 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
1596 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
1597 build2 (MULT_EXPR, TREE_TYPE (niters),
1598 niters, step_expr), init_expr);
1600 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
1601 add_referenced_tmp_var (var);
1603 ni_name = force_gimple_operand (ni, &stmt, false, var);
1605 /* Insert stmt into exit_bb. */
1606 last_bsi = bsi_last (exit_bb);
1608 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
1610 /* Fix phi expressions in the successor bb. */
1611 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
1616 /* Function vect_do_peeling_for_loop_bound
1618 Peel the last iterations of the loop represented by LOOP_VINFO.
1619 The peeled iterations form a new epilog loop. Given that the loop now
1620 iterates NITERS times, the new epilog loop iterates
1621 NITERS % VECTORIZATION_FACTOR times.
1623 The original loop will later be made to iterate
1624 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
1627 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
1628 struct loops *loops)
1631 tree ni_name, ratio_mult_vf_name;
1632 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1633 struct loop *new_loop;
1635 basic_block preheader;
1636 #ifdef ENABLE_CHECKING
1640 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1641 fprintf (vect_dump, "=== vect_transtorm_for_unknown_loop_bound ===");
1643 /* Generate the following variables on the preheader of original loop:
1645 ni_name = number of iteration the original loop executes
1646 ratio = ni_name / vf
1647 ratio_mult_vf_name = ratio * vf */
1648 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
1649 &ratio_mult_vf_name, ratio);
1651 #ifdef ENABLE_CHECKING
1652 loop_num = loop->num;
1654 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
1655 ratio_mult_vf_name, ni_name, false);
1656 #ifdef ENABLE_CHECKING
1657 gcc_assert (new_loop);
1658 gcc_assert (loop_num == loop->num);
1659 slpeel_verify_cfg_after_peeling (loop, new_loop);
1662 /* A guard that controls whether the new_loop is to be executed or skipped
1663 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
1664 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
1665 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
1666 is on the path where the LOOP IVs are used and need to be updated. */
1668 preheader = loop_preheader_edge (new_loop)->src;
1669 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
1670 update_e = EDGE_PRED (preheader, 0);
1672 update_e = EDGE_PRED (preheader, 1);
1674 /* Update IVs of original loop as if they were advanced
1675 by ratio_mult_vf_name steps. */
1676 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
1678 /* After peeling we have to reset scalar evolution analyzer. */
1685 /* Function vect_gen_niters_for_prolog_loop
1687 Set the number of iterations for the loop represented by LOOP_VINFO
1688 to the minimum between LOOP_NITERS (the original iteration count of the loop)
1689 and the misalignment of DR - the data reference recorded in
1690 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
1691 this loop, the data reference DR will refer to an aligned location.
1693 The following computation is generated:
1695 If the misalignment of DR is known at compile time:
1696 addr_mis = int mis = DR_MISALIGNMENT (dr);
1697 Else, compute address misalignment in bytes:
1698 addr_mis = addr & (vectype_size - 1)
1700 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
1702 (elem_size = element type size; an element is the scalar element
1703 whose type is the inner type of the vectype) */
1706 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
1708 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
1709 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1710 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1712 tree iters, iters_name;
1715 tree dr_stmt = DR_STMT (dr);
1716 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
1717 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1718 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
1719 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
1720 tree niters_type = TREE_TYPE (loop_niters);
1722 pe = loop_preheader_edge (loop);
1724 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
1726 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
1727 int element_size = vectype_align/vf;
1728 int elem_misalign = byte_misalign / element_size;
1730 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1731 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
1732 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
1736 tree new_stmts = NULL_TREE;
1738 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
1739 tree ptr_type = TREE_TYPE (start_addr);
1740 tree size = TYPE_SIZE (ptr_type);
1741 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
1742 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
1743 tree elem_size_log =
1744 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
1745 tree vf_tree = build_int_cst (unsigned_type_node, vf);
1749 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
1750 gcc_assert (!new_bb);
1752 /* Create: byte_misalign = addr & (vectype_size - 1) */
1754 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
1756 /* Create: elem_misalign = byte_misalign / element_size */
1758 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
1760 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
1761 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
1762 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
1763 iters = fold_convert (niters_type, iters);
1766 /* Create: prolog_loop_niters = min (iters, loop_niters) */
1767 /* If the loop bound is known at compile time we already verified that it is
1768 greater than vf; since the misalignment ('iters') is at most vf, there's
1769 no need to generate the MIN_EXPR in this case. */
1770 if (TREE_CODE (loop_niters) != INTEGER_CST)
1771 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
1773 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1775 fprintf (vect_dump, "niters for prolog loop: ");
1776 print_generic_expr (vect_dump, iters, TDF_SLIM);
1779 var = create_tmp_var (niters_type, "prolog_loop_niters");
1780 add_referenced_tmp_var (var);
1781 iters_name = force_gimple_operand (iters, &stmt, false, var);
1783 /* Insert stmt on loop preheader edge. */
1786 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
1787 gcc_assert (!new_bb);
1794 /* Function vect_update_init_of_dr
1796 NITERS iterations were peeled from LOOP. DR represents a data reference
1797 in LOOP. This function updates the information recorded in DR to
1798 account for the fact that the first NITERS iterations had already been
1799 executed. Specifically, it updates the OFFSET field of stmt_info. */
1802 vect_update_init_of_dr (struct data_reference *dr, tree niters)
1804 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1805 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
1807 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
1808 STMT_VINFO_VECT_STEP (stmt_info)));
1809 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
1810 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
1814 /* Function vect_update_inits_of_drs
1816 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
1817 This function updates the information recorded for the data references in
1818 the loop to account for the fact that the first NITERS iterations had
1819 already been executed. Specifically, it updates the initial_condition of the
1820 access_function of all the data_references in the loop. */
1823 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
1826 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
1827 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
1829 if (vect_dump && (dump_flags & TDF_DETAILS))
1830 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
1832 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
1834 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
1835 vect_update_init_of_dr (dr, niters);
1838 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
1840 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
1841 vect_update_init_of_dr (dr, niters);
1846 /* Function vect_do_peeling_for_alignment
1848 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
1849 'niters' is set to the misalignment of one of the data references in the
1850 loop, thereby forcing it to refer to an aligned location at the beginning
1851 of the execution of this loop. The data reference for which we are
1852 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
1855 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
1857 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1858 tree niters_of_prolog_loop, ni_name;
1860 struct loop *new_loop;
1862 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1863 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
1865 ni_name = vect_build_loop_niters (loop_vinfo);
1866 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
1868 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
1870 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
1871 niters_of_prolog_loop, ni_name, true);
1872 #ifdef ENABLE_CHECKING
1873 gcc_assert (new_loop);
1874 slpeel_verify_cfg_after_peeling (new_loop, loop);
1877 /* Update number of times loop executes. */
1878 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
1879 LOOP_VINFO_NITERS (loop_vinfo) = fold (build2 (MINUS_EXPR,
1880 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop));
1882 /* Update the init conditions of the access functions of all data refs. */
1883 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
1885 /* After peeling we have to reset scalar evolution analyzer. */
1892 /* Function vect_transform_loop.
1894 The analysis phase has determined that the loop is vectorizable.
1895 Vectorize the loop - created vectorized stmts to replace the scalar
1896 stmts in the loop, and update the loop exit condition. */
1899 vect_transform_loop (loop_vec_info loop_vinfo,
1900 struct loops *loops ATTRIBUTE_UNUSED)
1902 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1903 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1904 int nbbs = loop->num_nodes;
1905 block_stmt_iterator si;
1908 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1910 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1911 fprintf (vect_dump, "=== vec_transform_loop ===");
1914 /* Peel the loop if there are data refs with unknown alignment.
1915 Only one data ref with unknown store is allowed. */
1917 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
1918 vect_do_peeling_for_alignment (loop_vinfo, loops);
1920 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
1921 compile time constant), or it is a constant that doesn't divide by the
1922 vectorization factor, then an epilog loop needs to be created.
1923 We therefore duplicate the loop: the original loop will be vectorized,
1924 and will compute the first (n/VF) iterations. The second copy of the loop
1925 will remain scalar and will compute the remaining (n%VF) iterations.
1926 (VF is the vectorization factor). */
1928 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1929 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1930 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
1931 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
1933 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
1934 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
1936 /* 1) Make sure the loop header has exactly two entries
1937 2) Make sure we have a preheader basic block. */
1939 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
1941 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1944 /* FORNOW: the vectorizer supports only loops which body consist
1945 of one basic block (header + empty latch). When the vectorizer will
1946 support more involved loop forms, the order by which the BBs are
1947 traversed need to be reconsidered. */
1949 for (i = 0; i < nbbs; i++)
1951 basic_block bb = bbs[i];
1953 for (si = bsi_start (bb); !bsi_end_p (si);)
1955 tree stmt = bsi_stmt (si);
1956 stmt_vec_info stmt_info;
1959 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1961 fprintf (vect_dump, "------>vectorizing statement: ");
1962 print_generic_expr (vect_dump, stmt, TDF_SLIM);
1964 stmt_info = vinfo_for_stmt (stmt);
1965 gcc_assert (stmt_info);
1966 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1971 #ifdef ENABLE_CHECKING
1972 /* FORNOW: Verify that all stmts operate on the same number of
1973 units and no inner unrolling is necessary. */
1975 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
1976 == vectorization_factor);
1978 /* -------- vectorize statement ------------ */
1979 if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
1980 fprintf (vect_dump, "transform statement.");
1982 is_store = vect_transform_stmt (stmt, &si);
1985 /* free the attached stmt_vec_info and remove the stmt. */
1986 stmt_ann_t ann = stmt_ann (stmt);
1988 set_stmt_info (ann, NULL);
1997 slpeel_make_loop_iterate_ntimes (loop, ratio);
1999 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS, LOOP_LOC (loop_vinfo)))
2000 fprintf (vect_dump, "LOOP VECTORIZED.");