1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info, tree *, struct loops *);
75 static int vect_min_worthwhile_factor (enum tree_code);
78 /* Function vect_get_new_vect_var.
80 Returns a name for a new variable. The current naming scheme appends the
81 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82 the name of vectorizer generated variables, and appends that to NAME if
86 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
99 case vect_pointer_var:
107 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
109 new_vect_var = create_tmp_var (type, prefix);
115 /* Function vect_create_index_for_vector_ref.
117 Create (and return) an index variable, along with it's update chain in the
118 loop. This variable will be used to access a memory location in a vector
122 LOOP: The loop being vectorized.
123 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
124 function can be added here, or in the loop pre-header.
127 Return an index that will be used to index a vector array. It is expected
128 that a pointer to the first vector will be used as the base address for the
131 FORNOW: we are not trying to be efficient, just creating a new index each
132 time from scratch. At this time all vector references could use the same
135 TODO: create only one index to be used by all vector references. Record
136 the index in the LOOP_VINFO the first time this procedure is called and
137 return it on subsequent calls. The increment of this index must be placed
138 just before the conditional expression that ends the single block loop. */
141 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
144 block_stmt_iterator incr_bsi;
146 tree indx_before_incr, indx_after_incr;
147 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
150 /* It is assumed that the base pointer used for vectorized access contains
151 the address of the first vector. Therefore the index used for vectorized
152 access must be initialized to zero and incremented by 1. */
154 init = integer_zero_node;
155 step = integer_one_node;
157 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
158 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
159 &indx_before_incr, &indx_after_incr);
160 incr = bsi_stmt (incr_bsi);
161 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
163 return indx_before_incr;
167 /* Function vect_create_addr_base_for_vector_ref.
169 Create an expression that computes the address of the first memory location
170 that will be accessed for a data reference.
173 STMT: The statement containing the data reference.
174 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
175 OFFSET: Optional. If supplied, it is be added to the initial address.
178 1. Return an SSA_NAME whose value is the address of the memory location of
179 the first vector of the data reference.
180 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
181 these statement(s) which define the returned SSA_NAME.
183 FORNOW: We are only handling array accesses with step 1. */
186 vect_create_addr_base_for_vector_ref (tree stmt,
190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
191 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
192 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
193 tree base_name = build_fold_indirect_ref (data_ref_base);
194 tree ref = DR_REF (dr);
195 tree scalar_type = TREE_TYPE (ref);
196 tree scalar_ptr_type = build_pointer_type (scalar_type);
199 tree addr_base, addr_expr;
201 tree base_offset = unshare_expr (DR_OFFSET (dr));
202 tree init = unshare_expr (DR_INIT (dr));
204 /* Create base_offset */
205 base_offset = size_binop (PLUS_EXPR, base_offset, init);
206 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
207 add_referenced_tmp_var (dest);
208 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
209 append_to_statement_list_force (new_stmt, new_stmt_list);
213 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
214 add_referenced_tmp_var (tmp);
215 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
217 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
218 base_offset, offset);
219 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
220 append_to_statement_list_force (new_stmt, new_stmt_list);
223 /* base + base_offset */
224 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
227 /* addr_expr = addr_base */
228 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
229 get_name (base_name));
230 add_referenced_tmp_var (addr_expr);
231 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
232 new_temp = make_ssa_name (addr_expr, vec_stmt);
233 TREE_OPERAND (vec_stmt, 0) = new_temp;
234 append_to_statement_list_force (vec_stmt, new_stmt_list);
236 if (vect_print_dump_info (REPORT_DETAILS))
238 fprintf (vect_dump, "created ");
239 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
245 /* Function vect_align_data_ref.
247 Handle misalignment of a memory accesses.
249 FORNOW: Can't handle misaligned accesses.
250 Make sure that the dataref is aligned. */
253 vect_align_data_ref (tree stmt)
255 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
256 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
258 /* FORNOW: can't handle misaligned accesses;
259 all accesses expected to be aligned. */
260 gcc_assert (aligned_access_p (dr));
264 /* Function vect_create_data_ref_ptr.
266 Create a memory reference expression for vector access, to be used in a
267 vector load/store stmt. The reference is based on a new pointer to vector
271 1. STMT: a stmt that references memory. Expected to be of the form
272 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
273 2. BSI: block_stmt_iterator where new stmts can be added.
274 3. OFFSET (optional): an offset to be added to the initial address accessed
275 by the data-ref in STMT.
276 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
277 pointing to the initial address.
280 1. Declare a new ptr to vector_type, and have it point to the base of the
281 data reference (initial addressed accessed by the data reference).
282 For example, for vector of type V8HI, the following code is generated:
285 vp = (v8hi *)initial_address;
287 if OFFSET is not supplied:
288 initial_address = &a[init];
289 if OFFSET is supplied:
290 initial_address = &a[init + OFFSET];
292 Return the initial_address in INITIAL_ADDRESS.
294 2. Create a data-reference in the loop based on the new vector pointer vp,
295 and using a new index variable 'idx' as follows:
299 where if ONLY_INIT is true:
302 update = idx + vector_type_size
304 Return the pointer vp'.
307 FORNOW: handle only aligned and consecutive accesses. */
310 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
311 tree *initial_address, bool only_init)
314 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
317 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
323 tree new_stmt_list = NULL_TREE;
325 edge pe = loop_preheader_edge (loop);
331 tree type, tmp, size;
332 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
334 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
336 if (vect_print_dump_info (REPORT_DETAILS))
338 tree data_ref_base = base_name;
339 fprintf (vect_dump, "create array_ref of type: ");
340 print_generic_expr (vect_dump, vectype, TDF_SLIM);
341 if (TREE_CODE (data_ref_base) == VAR_DECL)
342 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
343 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
344 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
345 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
346 fprintf (vect_dump, " vectorizing a record based array ref: ");
347 else if (TREE_CODE (data_ref_base) == SSA_NAME)
348 fprintf (vect_dump, " vectorizing a pointer ref: ");
349 print_generic_expr (vect_dump, base_name, TDF_SLIM);
352 /** (1) Create the new vector-pointer variable: **/
354 vect_ptr_type = build_pointer_type (vectype);
355 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
356 get_name (base_name));
357 add_referenced_tmp_var (vect_ptr);
360 /** (2) Add aliasing information to the new vector-pointer:
361 (The points-to info (DR_PTR_INFO) may be defined later.) **/
363 tag = DR_MEMTAG (dr);
366 /* If tag is a variable (and NOT_A_TAG) than a new type alias
367 tag must be created with tag added to its may alias list. */
368 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
369 new_type_alias (vect_ptr, tag);
371 var_ann (vect_ptr)->type_mem_tag = tag;
373 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
375 /** (3) Calculate the initial address the vector-pointer, and set
376 the vector-pointer to point to it before the loop: **/
378 /* Create: (&(base[init_val+offset]) in the loop preheader. */
379 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
381 pe = loop_preheader_edge (loop);
382 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
383 gcc_assert (!new_bb);
384 *initial_address = new_temp;
386 /* Create: p = (vectype *) initial_base */
387 vec_stmt = fold_convert (vect_ptr_type, new_temp);
388 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
389 new_temp = make_ssa_name (vect_ptr, vec_stmt);
390 TREE_OPERAND (vec_stmt, 0) = new_temp;
391 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
392 gcc_assert (!new_bb);
393 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
396 /** (4) Handle the updating of the vector-pointer inside the loop: **/
398 if (only_init) /* No update in loop is required. */
400 /* Copy the points-to information if it exists. */
401 if (DR_PTR_INFO (dr))
402 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
403 return vect_ptr_init;
406 idx = vect_create_index_for_vector_ref (loop_vinfo);
408 /* Create: update = idx * vectype_size */
409 tmp = create_tmp_var (integer_type_node, "update");
410 add_referenced_tmp_var (tmp);
411 size = TYPE_SIZE (vect_ptr_type);
412 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
413 ptr_update = create_tmp_var (type, "update");
414 add_referenced_tmp_var (ptr_update);
415 vectype_size = TYPE_SIZE_UNIT (vectype);
416 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
417 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
418 new_temp = make_ssa_name (tmp, vec_stmt);
419 TREE_OPERAND (vec_stmt, 0) = new_temp;
420 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
421 vec_stmt = fold_convert (type, new_temp);
422 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
423 new_temp = make_ssa_name (ptr_update, vec_stmt);
424 TREE_OPERAND (vec_stmt, 0) = new_temp;
425 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
427 /* Create: data_ref_ptr = vect_ptr_init + update */
428 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
429 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
430 new_temp = make_ssa_name (vect_ptr, vec_stmt);
431 TREE_OPERAND (vec_stmt, 0) = new_temp;
432 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
433 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
435 /* Copy the points-to information if it exists. */
436 if (DR_PTR_INFO (dr))
437 duplicate_ssa_name_ptr_info (data_ref_ptr, DR_PTR_INFO (dr));
442 /* Function vect_create_destination_var.
444 Create a new temporary of type VECTYPE. */
447 vect_create_destination_var (tree scalar_dest, tree vectype)
450 const char *new_name;
452 enum vect_var_kind kind;
454 kind = vectype ? vect_simple_var : vect_scalar_var;
455 type = vectype ? vectype : TREE_TYPE (scalar_dest);
457 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
459 new_name = get_name (scalar_dest);
462 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
463 add_referenced_tmp_var (vec_dest);
469 /* Function vect_init_vector.
471 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
472 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
473 used in the vectorization of STMT. */
476 vect_init_vector (tree stmt, tree vector_var)
478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
480 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
483 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
489 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
490 add_referenced_tmp_var (new_var);
492 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
493 new_temp = make_ssa_name (new_var, init_stmt);
494 TREE_OPERAND (init_stmt, 0) = new_temp;
496 pe = loop_preheader_edge (loop);
497 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
498 gcc_assert (!new_bb);
500 if (vect_print_dump_info (REPORT_DETAILS))
502 fprintf (vect_dump, "created new init_stmt: ");
503 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
506 vec_oprnd = TREE_OPERAND (init_stmt, 0);
511 /* Function vect_get_vec_def_for_operand.
513 OP is an operand in STMT. This function returns a (vector) def that will be
514 used in the vectorized stmt for STMT.
516 In the case that OP is an SSA_NAME which is defined in the loop, then
517 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
519 In case OP is an invariant or constant, a new stmt that creates a vector def
520 needs to be introduced. */
523 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
528 stmt_vec_info def_stmt_info = NULL;
529 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
530 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
531 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
532 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
533 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
539 enum vect_def_type dt;
542 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
545 print_generic_expr (vect_dump, op, TDF_SLIM);
548 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
549 gcc_assert (is_simple_use);
550 if (vect_print_dump_info (REPORT_DETAILS))
554 fprintf (vect_dump, "def = ");
555 print_generic_expr (vect_dump, def, TDF_SLIM);
559 fprintf (vect_dump, " def_stmt = ");
560 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
566 /* Case 1: operand is a constant. */
567 case vect_constant_def:
572 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
573 if (vect_print_dump_info (REPORT_DETAILS))
574 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
576 for (i = nunits - 1; i >= 0; --i)
578 t = tree_cons (NULL_TREE, op, t);
580 vec_cst = build_vector (vectype, t);
581 return vect_init_vector (stmt, vec_cst);
584 /* Case 2: operand is defined outside the loop - loop invariant. */
585 case vect_invariant_def:
590 /* Create 'vec_inv = {inv,inv,..,inv}' */
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump, "Create vector_inv.");
594 for (i = nunits - 1; i >= 0; --i)
596 t = tree_cons (NULL_TREE, def, t);
599 /* FIXME: use build_constructor directly. */
600 vec_inv = build_constructor_from_list (vectype, t);
601 return vect_init_vector (stmt, vec_inv);
604 /* Case 3: operand is defined inside the loop. */
608 *scalar_def = def_stmt;
610 /* Get the def from the vectorized stmt. */
611 def_stmt_info = vinfo_for_stmt (def_stmt);
612 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
613 gcc_assert (vec_stmt);
614 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
618 /* Case 4: operand is defined by a loop header phi - reduction */
619 case vect_reduction_def:
621 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
623 /* Get the def before the loop */
624 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
625 return get_initial_def_for_reduction (stmt, op, scalar_def);
628 /* Case 5: operand is defined by loop-header phi - induction. */
629 case vect_induction_def:
631 if (vect_print_dump_info (REPORT_DETAILS))
632 fprintf (vect_dump, "induction - unsupported.");
633 internal_error ("no support for induction"); /* FORNOW */
642 /* Function vect_finish_stmt_generation.
644 Insert a new stmt. */
647 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
649 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
651 if (vect_print_dump_info (REPORT_DETAILS))
653 fprintf (vect_dump, "add new stmt: ");
654 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
657 /* Make sure bsi points to the stmt that is being vectorized. */
658 gcc_assert (stmt == bsi_stmt (*bsi));
660 #ifdef USE_MAPPED_LOCATION
661 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
663 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
668 #define ADJUST_IN_EPILOG 1
670 /* Function get_initial_def_for_reduction
673 STMT - a stmt that performs a reduction operation in the loop.
674 INIT_VAL - the initial value of the reduction variable
677 SCALAR_DEF - a tree that holds a value to be added to the final result
678 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
679 Return a vector variable, initialized according to the operation that STMT
680 performs. This vector will be used as the initial value of the
681 vector of partial results.
683 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
686 min/max: [init_val,init_val,..,init_val,init_val]
687 bit and/or: [init_val,init_val,..,init_val,init_val]
688 and when necessary (e.g. add/mult case) let the caller know
689 that it needs to adjust the result by init_val.
691 Option2: Initialize the vector as follows:
692 add: [0,0,...,0,init_val]
693 mult: [1,1,...,1,init_val]
694 min/max: [init_val,init_val,...,init_val]
695 bit and/or: [init_val,init_val,...,init_val]
696 and no adjustments are needed.
698 For example, for the following code:
704 STMT is 's = s + a[i]', and the reduction variable is 's'.
705 For a vector of 4 units, we want to return either [0,0,0,init_val],
706 or [0,0,0,0] and let the caller know that it needs to adjust
707 the result at the end by 'init_val'.
709 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
710 TODO: Use some cost-model to estimate which scheme is more profitable.
714 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
716 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
717 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
718 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
720 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
721 tree type = TREE_TYPE (init_val);
723 tree vec, t = NULL_TREE;
724 bool need_epilog_adjust;
727 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
732 if (INTEGRAL_TYPE_P (type))
733 def = build_int_cst (type, 0);
735 def = build_real (type, dconst0);
737 #ifdef ADJUST_IN_EPILOG
738 /* All the 'nunits' elements are set to 0. The final result will be
739 adjusted by 'init_val' at the loop epilog. */
741 need_epilog_adjust = true;
743 /* 'nunits - 1' elements are set to 0; The last element is set to
744 'init_val'. No further adjustments at the epilog are needed. */
745 nelements = nunits - 1;
746 need_epilog_adjust = false;
754 need_epilog_adjust = true;
761 for (i = nelements - 1; i >= 0; --i)
762 t = tree_cons (NULL_TREE, def, t);
764 if (nelements == nunits - 1)
766 /* Set the last element of the vector. */
767 t = tree_cons (NULL_TREE, init_val, t);
770 gcc_assert (nelements == nunits);
772 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
773 vec = build_vector (vectype, t);
775 vec = build_constructor_from_list (vectype, t);
777 if (!need_epilog_adjust)
779 if (INTEGRAL_TYPE_P (type))
780 init_val = build_int_cst (type, 0);
782 init_val = build_real (type, dconst0);
784 *scalar_def = init_val;
786 return vect_init_vector (stmt, vec);
790 /* Function vect_create_epilog_for_reduction:
792 Create code at the loop-epilog to finalize the result of a reduction
795 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
796 into a single result, by applying the operation REDUC_CODE on the
797 partial-results-vector. For this, we need to create a new phi node at the
798 loop exit to preserve loop-closed form, as illustrated below.
800 STMT is the original scalar reduction stmt that is being vectorized.
801 REDUCTION_OP is the scalar reduction-variable.
802 REDUCTION_PHI is the phi-node that carries the reduction computation.
803 This function also sets the arguments for the REDUCTION_PHI:
804 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
805 The loop-latch argument is VECT_DEF - the vector of partial sums.
807 This function transforms this:
810 vec_def = phi <null, null> # REDUCTION_PHI
815 s_out0 = phi <s_loop> # EXIT_PHI
823 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
828 s_out0 = phi <s_loop> # EXIT_PHI
829 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
831 v_out2 = reduc_expr <v_out1>
832 s_out3 = extract_field <v_out2, 0>
839 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
840 enum tree_code reduc_code, tree reduction_phi)
842 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
843 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
844 enum machine_mode mode = TYPE_MODE (vectype);
845 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
846 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
848 tree scalar_dest = TREE_OPERAND (stmt, 0);
849 tree scalar_type = TREE_TYPE (scalar_dest);
851 block_stmt_iterator exit_bsi;
856 tree new_scalar_dest, exit_phi;
857 tree bitsize, bitpos, bytesize;
858 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
859 tree scalar_initial_def;
860 tree vec_initial_def;
862 imm_use_iterator imm_iter;
864 bool extract_scalar_result;
865 bool adjust_in_epilog;
867 /*** 1. Create the reduction def-use cycle ***/
869 /* 1.1 set the loop-entry arg of the reduction-phi: */
870 /* For the case of reduction, vect_get_vec_def_for_operand returns
871 the scalar def before the loop, that defines the initial value
872 of the reduction variable. */
873 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
874 &scalar_initial_def);
875 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
878 /* 1.2 set the loop-latch arg for the reduction-phi: */
879 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
881 if (vect_print_dump_info (REPORT_DETAILS))
883 fprintf (vect_dump, "transform reduction: created def-use cycle:");
884 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
885 fprintf (vect_dump, "\n");
886 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
890 /*** 2. Create epilog code ***/
892 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
893 v_out1 = phi <v_loop> */
895 exit_bb = loop->single_exit->dest;
896 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
897 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
899 exit_bsi = bsi_start (exit_bb);
902 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
903 bitsize = TYPE_SIZE (scalar_type);
904 bytesize = TYPE_SIZE_UNIT (scalar_type);
906 /* 2.2 Create the reduction code. */
908 if (reduc_code < NUM_TREE_CODES)
911 v_out2 = reduc_expr <v_out1> */
913 if (vect_print_dump_info (REPORT_DETAILS))
914 fprintf (vect_dump, "Reduce using direct vector reduction.");
916 vec_dest = vect_create_destination_var (scalar_dest, vectype);
917 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
918 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
919 new_temp = make_ssa_name (vec_dest, epilog_stmt);
920 TREE_OPERAND (epilog_stmt, 0) = new_temp;
921 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
923 extract_scalar_result = true;
924 adjust_in_epilog = true;
928 enum tree_code shift_code = 0;
929 bool have_whole_vector_shift = true;
930 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
932 int element_bitsize = tree_low_cst (bitsize, 1);
933 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
936 /* The result of the reduction is expected to be at the least
937 significant bits of the vector. This is merely convention,
938 as it's the extraction later that really matters, and that
939 is also under our control. */
940 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
941 shift_code = VEC_RSHIFT_EXPR;
943 have_whole_vector_shift = false;
945 /* Regardless of whether we have a whole vector shift, if we're
946 emulating the operation via tree-vect-generic, we don't want
947 to use it. Only the first round of the reduction is likely
948 to still be profitable via emulation. */
949 /* ??? It might be better to emit a reduction tree code here, so that
950 tree-vect-generic can expand the first round via bit tricks. */
951 if (!VECTOR_MODE_P (mode))
952 have_whole_vector_shift = false;
955 optab optab = optab_for_tree_code (code, vectype);
956 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
957 have_whole_vector_shift = false;
960 if (have_whole_vector_shift)
963 for (offset = VS/2; offset >= element_size; offset/=2)
965 Create: va' = vec_shift <va, offset>
966 Create: va = vop <va, va'>
969 if (vect_print_dump_info (REPORT_DETAILS))
970 fprintf (vect_dump, "Reduce using vector shifts");
972 vec_dest = vect_create_destination_var (scalar_dest, vectype);
973 new_temp = PHI_RESULT (new_phi);
975 for (bit_offset = vec_size_in_bits/2;
976 bit_offset >= element_bitsize;
979 tree bitpos = size_int (bit_offset);
981 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
982 build2 (shift_code, vectype, new_temp, bitpos));
983 new_name = make_ssa_name (vec_dest, epilog_stmt);
984 TREE_OPERAND (epilog_stmt, 0) = new_name;
985 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
986 if (vect_print_dump_info (REPORT_DETAILS))
987 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
990 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
991 build2 (code, vectype, new_name, new_temp));
992 new_temp = make_ssa_name (vec_dest, epilog_stmt);
993 TREE_OPERAND (epilog_stmt, 0) = new_temp;
994 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
995 if (vect_print_dump_info (REPORT_DETAILS))
996 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
999 extract_scalar_result = true;
1000 adjust_in_epilog = true;
1006 for (offset=0; offset<vector_size; offset+=element_size;)
1008 Create: s' = extract_field <v_out2, offset>
1009 Create: s = op <s, s'>
1012 if (vect_print_dump_info (REPORT_DETAILS))
1013 fprintf (vect_dump, "Reduce using scalar code. ");
1015 vec_temp = PHI_RESULT (new_phi);
1016 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1018 /* first iteration is peeled out when possible to minimize
1019 the number of operations we generate: */
1020 if (code == PLUS_EXPR
1021 && (integer_zerop (scalar_initial_def)
1022 || real_zerop (scalar_initial_def)))
1024 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1025 build3 (BIT_FIELD_REF, scalar_type,
1026 vec_temp, bitsize, bitsize_zero_node));
1027 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1028 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1029 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1030 if (vect_print_dump_info (REPORT_DETAILS))
1031 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1033 bit_offset = element_bitsize;
1037 new_temp = scalar_initial_def;
1042 bit_offset < vec_size_in_bits;
1043 bit_offset += element_bitsize)
1045 tree bitpos = bitsize_int (bit_offset);
1047 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1048 build3 (BIT_FIELD_REF, scalar_type,
1049 vec_temp, bitsize, bitpos));
1050 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1051 TREE_OPERAND (epilog_stmt, 0) = new_name;
1052 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1053 if (vect_print_dump_info (REPORT_DETAILS))
1054 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1057 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1058 build2 (code, scalar_type, new_name, new_temp));
1059 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1060 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1061 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1062 if (vect_print_dump_info (REPORT_DETAILS))
1063 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1066 extract_scalar_result = false;
1067 adjust_in_epilog = false;
1072 /* 2.3 Extract the final scalar result. Create:
1073 s_out3 = extract_field <v_out2, bitpos> */
1075 if (extract_scalar_result)
1077 if (vect_print_dump_info (REPORT_DETAILS))
1078 fprintf (vect_dump, "extract scalar result");
1080 /* The result is in the low order bits. */
1081 if (BITS_BIG_ENDIAN)
1082 bitpos = size_binop (MULT_EXPR,
1083 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1084 TYPE_SIZE (scalar_type));
1086 bitpos = bitsize_zero_node;
1088 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1089 build3 (BIT_FIELD_REF, scalar_type,
1090 new_temp, bitsize, bitpos));
1091 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1092 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1093 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1094 if (vect_print_dump_info (REPORT_DETAILS))
1095 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1099 /* 2.4 Adjust the final result by the initial value of the reduction
1100 variable. (when such adjustment is not needed, then
1101 'scalar_initial_def' is zero).
1104 s_out = scalar_expr <s_out, scalar_initial_def> */
1106 if (adjust_in_epilog)
1108 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1109 build2 (code, scalar_type, new_temp, scalar_initial_def));
1110 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1111 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1112 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1114 if (vect_print_dump_info (REPORT_DETAILS))
1115 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1119 /* 2.5 Replace uses of s_out0 with uses of s_out3 */
1121 /* Find the loop-closed-use at the loop exit of the original
1122 scalar result. (The reduction result is expected to have
1123 two immediate uses - one at the latch block, and one at the
1126 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1128 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1130 exit_phi = USE_STMT (use_p);
1135 orig_name = PHI_RESULT (exit_phi);
1137 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1138 SET_USE (use_p, new_temp);
1142 /* Function vectorizable_reduction.
1144 Check if STMT performs a reduction operation that can be vectorized.
1145 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1146 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1147 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1150 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1156 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1157 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1158 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1159 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1161 enum tree_code code, reduc_code = 0;
1162 enum machine_mode vec_mode;
1164 optab optab, reduc_optab;
1166 tree def0, def1, def_stmt0, def_stmt1;
1167 enum vect_def_type dt0, dt1;
1170 bool is_simple_use0;
1171 bool is_simple_use1;
1173 /* Is vectorizable reduction? */
1175 /* Not supportable if the reduction variable is used in the loop. */
1176 if (STMT_VINFO_RELEVANT_P (stmt_info))
1179 if (!STMT_VINFO_LIVE_P (stmt_info))
1182 /* Make sure it was already recognized as a reduction pattern. */
1183 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1186 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1188 operation = TREE_OPERAND (stmt, 1);
1189 code = TREE_CODE (operation);
1190 op_type = TREE_CODE_LENGTH (code);
1192 if (op_type != binary_op)
1195 op0 = TREE_OPERAND (operation, 0);
1196 op1 = TREE_OPERAND (operation, 1);
1197 scalar_dest = TREE_OPERAND (stmt, 0);
1198 scalar_type = TREE_TYPE (scalar_dest);
1200 /* Check the first operand. It is expected to be defined inside the loop. */
1202 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1204 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1206 gcc_assert (is_simple_use0);
1207 gcc_assert (is_simple_use1);
1208 gcc_assert (dt0 == vect_loop_def);
1209 gcc_assert (dt1 == vect_reduction_def);
1210 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1211 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1213 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1216 /* Supportable by target? */
1218 /* check support for the operation in the loop */
1219 optab = optab_for_tree_code (code, vectype);
1222 if (vect_print_dump_info (REPORT_DETAILS))
1223 fprintf (vect_dump, "no optab.");
1226 vec_mode = TYPE_MODE (vectype);
1227 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1229 if (vect_print_dump_info (REPORT_DETAILS))
1230 fprintf (vect_dump, "op not supported by target.");
1231 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1232 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1233 < vect_min_worthwhile_factor (code))
1235 if (vect_print_dump_info (REPORT_DETAILS))
1236 fprintf (vect_dump, "proceeding using word mode.");
1239 /* Worthwhile without SIMD support? */
1240 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1241 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1242 < vect_min_worthwhile_factor (code))
1244 if (vect_print_dump_info (REPORT_DETAILS))
1245 fprintf (vect_dump, "not worthwhile without SIMD support.");
1249 /* check support for the epilog operation */
1250 if (!reduction_code_for_scalar_code (code, &reduc_code))
1252 reduc_optab = optab_for_tree_code (reduc_code, vectype);
1255 if (vect_print_dump_info (REPORT_DETAILS))
1256 fprintf (vect_dump, "no optab for reduction.");
1257 reduc_code = NUM_TREE_CODES;
1259 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1261 if (vect_print_dump_info (REPORT_DETAILS))
1262 fprintf (vect_dump, "reduc op not supported by target.");
1263 reduc_code = NUM_TREE_CODES;
1266 if (!vec_stmt) /* transformation not required. */
1268 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1274 if (vect_print_dump_info (REPORT_DETAILS))
1275 fprintf (vect_dump, "transform reduction.");
1277 /* Create the destination vector */
1278 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1281 /* Create the reduction-phi that defines the reduction-operand. */
1282 new_phi = create_phi_node (vec_dest, loop->header);
1285 /* Prepare the operand that is defined inside the loop body */
1286 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1287 gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def))));
1290 /* Create the vectorized operation that computes the partial results */
1291 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1292 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1293 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1294 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1295 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1298 /* Finalize the reduction-phi (set it's arguments) and create the
1299 epilog reduction code. */
1300 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1305 /* Function vectorizable_assignment.
1307 Check if STMT performs an assignment (copy) that can be vectorized.
1308 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1309 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1310 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1313 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1319 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1320 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1321 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1324 enum vect_def_type dt;
1326 /* Is vectorizable assignment? */
1327 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1330 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1332 if (TREE_CODE (stmt) != MODIFY_EXPR)
1335 scalar_dest = TREE_OPERAND (stmt, 0);
1336 if (TREE_CODE (scalar_dest) != SSA_NAME)
1339 op = TREE_OPERAND (stmt, 1);
1340 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1342 if (vect_print_dump_info (REPORT_DETAILS))
1343 fprintf (vect_dump, "use not simple.");
1347 if (!vec_stmt) /* transformation not required. */
1349 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1354 if (vect_print_dump_info (REPORT_DETAILS))
1355 fprintf (vect_dump, "transform assignment.");
1358 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1361 op = TREE_OPERAND (stmt, 1);
1362 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1364 /* Arguments are ready. create the new vector stmt. */
1365 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1366 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1367 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1368 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1374 /* Function vect_min_worthwhile_factor.
1376 For a loop where we could vectorize the operation indicated by CODE,
1377 return the minimum vectorization factor that makes it worthwhile
1378 to use generic vectors. */
1380 vect_min_worthwhile_factor (enum tree_code code)
1401 /* Function vectorizable_operation.
1403 Check if STMT performs a binary or unary operation that can be vectorized.
1404 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1405 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1406 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1409 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1414 tree op0, op1 = NULL;
1415 tree vec_oprnd0, vec_oprnd1=NULL;
1416 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1417 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1418 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1420 enum tree_code code;
1421 enum machine_mode vec_mode;
1427 enum vect_def_type dt;
1429 /* Is STMT a vectorizable binary/unary operation? */
1430 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1433 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1435 if (STMT_VINFO_LIVE_P (stmt_info))
1437 /* FORNOW: not yet supported. */
1438 if (vect_print_dump_info (REPORT_DETAILS))
1439 fprintf (vect_dump, "value used after loop.");
1443 if (TREE_CODE (stmt) != MODIFY_EXPR)
1446 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1449 operation = TREE_OPERAND (stmt, 1);
1450 code = TREE_CODE (operation);
1451 optab = optab_for_tree_code (code, vectype);
1453 /* Support only unary or binary operations. */
1454 op_type = TREE_CODE_LENGTH (code);
1455 if (op_type != unary_op && op_type != binary_op)
1457 if (vect_print_dump_info (REPORT_DETAILS))
1458 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1462 for (i = 0; i < op_type; i++)
1464 op = TREE_OPERAND (operation, i);
1465 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1467 if (vect_print_dump_info (REPORT_DETAILS))
1468 fprintf (vect_dump, "use not simple.");
1473 /* Supportable by target? */
1476 if (vect_print_dump_info (REPORT_DETAILS))
1477 fprintf (vect_dump, "no optab.");
1480 vec_mode = TYPE_MODE (vectype);
1481 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1483 if (vect_print_dump_info (REPORT_DETAILS))
1484 fprintf (vect_dump, "op not supported by target.");
1485 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1486 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1487 < vect_min_worthwhile_factor (code))
1489 if (vect_print_dump_info (REPORT_DETAILS))
1490 fprintf (vect_dump, "proceeding using word mode.");
1493 /* Worthwhile without SIMD support? */
1494 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1495 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1496 < vect_min_worthwhile_factor (code))
1498 if (vect_print_dump_info (REPORT_DETAILS))
1499 fprintf (vect_dump, "not worthwhile without SIMD support.");
1503 if (!vec_stmt) /* transformation not required. */
1505 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1511 if (vect_print_dump_info (REPORT_DETAILS))
1512 fprintf (vect_dump, "transform binary/unary operation.");
1515 scalar_dest = TREE_OPERAND (stmt, 0);
1516 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1519 op0 = TREE_OPERAND (operation, 0);
1520 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1522 if (op_type == binary_op)
1524 op1 = TREE_OPERAND (operation, 1);
1525 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1528 /* Arguments are ready. create the new vector stmt. */
1530 if (op_type == binary_op)
1531 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1532 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1534 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1535 build1 (code, vectype, vec_oprnd0));
1536 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1537 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1538 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1544 /* Function vectorizable_store.
1546 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1548 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1549 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1550 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1553 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1559 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1560 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1561 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1562 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1563 enum machine_mode vec_mode;
1565 enum dr_alignment_support alignment_support_cheme;
1568 enum vect_def_type dt;
1570 /* Is vectorizable store? */
1572 if (TREE_CODE (stmt) != MODIFY_EXPR)
1575 scalar_dest = TREE_OPERAND (stmt, 0);
1576 if (TREE_CODE (scalar_dest) != ARRAY_REF
1577 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1580 op = TREE_OPERAND (stmt, 1);
1581 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1583 if (vect_print_dump_info (REPORT_DETAILS))
1584 fprintf (vect_dump, "use not simple.");
1588 vec_mode = TYPE_MODE (vectype);
1589 /* FORNOW. In some cases can vectorize even if data-type not supported
1590 (e.g. - array initialization with 0). */
1591 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1594 if (!STMT_VINFO_DATA_REF (stmt_info))
1598 if (!vec_stmt) /* transformation not required. */
1600 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1606 if (vect_print_dump_info (REPORT_DETAILS))
1607 fprintf (vect_dump, "transform store");
1609 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1610 gcc_assert (alignment_support_cheme);
1611 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1613 /* Handle use - get the vectorized def from the defining stmt. */
1614 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1617 /* FORNOW: make sure the data reference is aligned. */
1618 vect_align_data_ref (stmt);
1619 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1620 data_ref = build_fold_indirect_ref (data_ref);
1622 /* Arguments are ready. create the new vector stmt. */
1623 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1624 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1626 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1627 element's definition to the vector's definition then update the
1628 defining statement. The original is being deleted so the same
1629 SSA_NAMEs can be used. */
1630 copy_virtual_operands (*vec_stmt, stmt);
1632 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1634 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1636 /* If this virtual def has a use outside the loop and a loop peel is
1637 performed then the def may be renamed by the peel. Mark it for
1638 renaming so the later use will also be renamed. */
1639 mark_sym_for_renaming (SSA_NAME_VAR (def));
1646 /* vectorizable_load.
1648 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1650 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1651 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1652 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1655 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1658 tree vec_dest = NULL;
1659 tree data_ref = NULL;
1661 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1662 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1663 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1670 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1671 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1672 edge pe = loop_preheader_edge (loop);
1673 enum dr_alignment_support alignment_support_cheme;
1675 /* Is vectorizable load? */
1676 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1679 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1681 if (STMT_VINFO_LIVE_P (stmt_info))
1683 /* FORNOW: not yet supported. */
1684 if (vect_print_dump_info (REPORT_DETAILS))
1685 fprintf (vect_dump, "value used after loop.");
1689 if (TREE_CODE (stmt) != MODIFY_EXPR)
1692 scalar_dest = TREE_OPERAND (stmt, 0);
1693 if (TREE_CODE (scalar_dest) != SSA_NAME)
1696 op = TREE_OPERAND (stmt, 1);
1697 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1700 if (!STMT_VINFO_DATA_REF (stmt_info))
1703 mode = (int) TYPE_MODE (vectype);
1705 /* FORNOW. In some cases can vectorize even if data-type not supported
1706 (e.g. - data copies). */
1707 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1709 if (vect_print_dump_info (REPORT_DETAILS))
1710 fprintf (vect_dump, "Aligned load, but unsupported type.");
1714 if (!vec_stmt) /* transformation not required. */
1716 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1722 if (vect_print_dump_info (REPORT_DETAILS))
1723 fprintf (vect_dump, "transform load.");
1725 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1726 gcc_assert (alignment_support_cheme);
1728 if (alignment_support_cheme == dr_aligned
1729 || alignment_support_cheme == dr_unaligned_supported)
1740 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1741 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1742 if (aligned_access_p (dr))
1743 data_ref = build_fold_indirect_ref (data_ref);
1746 int mis = DR_MISALIGNMENT (dr);
1747 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1748 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1749 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1751 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1752 new_temp = make_ssa_name (vec_dest, new_stmt);
1753 TREE_OPERAND (new_stmt, 0) = new_temp;
1754 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1755 copy_virtual_operands (new_stmt, stmt);
1757 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1761 msq_init = *(floor(p1))
1762 p2 = initial_addr + VS - 1;
1763 magic = have_builtin ? builtin_result : initial_address;
1766 p2' = p2 + indx * vectype_size
1768 vec_dest = realign_load (msq, lsq, magic)
1782 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1783 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1784 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1786 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1787 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1788 new_temp = make_ssa_name (vec_dest, new_stmt);
1789 TREE_OPERAND (new_stmt, 0) = new_temp;
1790 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1791 gcc_assert (!new_bb);
1792 msq_init = TREE_OPERAND (new_stmt, 0);
1793 copy_virtual_operands (new_stmt, stmt);
1794 update_vuses_to_preheader (new_stmt, loop);
1797 /* <2> Create lsq = *(floor(p2')) in the loop */
1798 offset = build_int_cst (integer_type_node,
1799 TYPE_VECTOR_SUBPARTS (vectype));
1800 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1801 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1802 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1803 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1804 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1805 new_temp = make_ssa_name (vec_dest, new_stmt);
1806 TREE_OPERAND (new_stmt, 0) = new_temp;
1807 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1808 lsq = TREE_OPERAND (new_stmt, 0);
1809 copy_virtual_operands (new_stmt, stmt);
1813 if (targetm.vectorize.builtin_mask_for_load)
1815 /* Create permutation mask, if required, in loop preheader. */
1817 params = build_tree_list (NULL_TREE, init_addr);
1818 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1819 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1820 new_stmt = build_function_call_expr (builtin_decl, params);
1821 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1822 new_temp = make_ssa_name (vec_dest, new_stmt);
1823 TREE_OPERAND (new_stmt, 0) = new_temp;
1824 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1825 gcc_assert (!new_bb);
1826 magic = TREE_OPERAND (new_stmt, 0);
1828 /* The result of the CALL_EXPR to this builtin is determined from
1829 the value of the parameter and no global variables are touched
1830 which makes the builtin a "const" function. Requiring the
1831 builtin to have the "const" attribute makes it unnecessary
1832 to call mark_call_clobbered_vars_to_rename. */
1833 gcc_assert (TREE_READONLY (builtin_decl));
1837 /* Use current address instead of init_addr for reduced reg pressure.
1839 magic = dataref_ptr;
1843 /* <4> Create msq = phi <msq_init, lsq> in loop */
1844 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1845 msq = make_ssa_name (vec_dest, NULL_TREE);
1846 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1847 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1848 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1849 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1852 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1854 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1855 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1856 new_temp = make_ssa_name (vec_dest, new_stmt);
1857 TREE_OPERAND (new_stmt, 0) = new_temp;
1858 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1863 *vec_stmt = new_stmt;
1868 /* Function vectorizable_live_operation.
1870 STMT computes a value that is used outside the loop. Check if
1871 it can be supported. */
1874 vectorizable_live_operation (tree stmt,
1875 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1876 tree *vec_stmt ATTRIBUTE_UNUSED)
1879 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1880 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1882 enum tree_code code;
1886 enum vect_def_type dt;
1888 if (!STMT_VINFO_LIVE_P (stmt_info))
1891 if (TREE_CODE (stmt) != MODIFY_EXPR)
1894 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1897 operation = TREE_OPERAND (stmt, 1);
1898 code = TREE_CODE (operation);
1900 op_type = TREE_CODE_LENGTH (code);
1902 /* FORNOW: support only if all uses are invariant. This means
1903 that the scalar operations can remain in place, unvectorized.
1904 The original last scalar value that they compute will be used. */
1906 for (i = 0; i < op_type; i++)
1908 op = TREE_OPERAND (operation, i);
1909 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1911 if (vect_print_dump_info (REPORT_DETAILS))
1912 fprintf (vect_dump, "use not simple.");
1916 if (dt != vect_invariant_def && dt != vect_constant_def)
1920 /* No transformation is required for the cases we currently support. */
1925 /* Function vect_is_simple_cond.
1928 LOOP - the loop that is being vectorized.
1929 COND - Condition that is checked for simple use.
1931 Returns whether a COND can be vectorized. Checks whether
1932 condition operands are supportable using vec_is_simple_use. */
1935 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1939 enum vect_def_type dt;
1941 if (!COMPARISON_CLASS_P (cond))
1944 lhs = TREE_OPERAND (cond, 0);
1945 rhs = TREE_OPERAND (cond, 1);
1947 if (TREE_CODE (lhs) == SSA_NAME)
1949 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1950 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1953 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1956 if (TREE_CODE (rhs) == SSA_NAME)
1958 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1959 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1962 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1968 /* vectorizable_condition.
1970 Check if STMT is conditional modify expression that can be vectorized.
1971 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1972 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1975 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1978 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1980 tree scalar_dest = NULL_TREE;
1981 tree vec_dest = NULL_TREE;
1982 tree op = NULL_TREE;
1983 tree cond_expr, then_clause, else_clause;
1984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1985 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1986 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1987 tree vec_compare, vec_cond_expr;
1989 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1990 enum machine_mode vec_mode;
1992 enum vect_def_type dt;
1994 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1997 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1999 if (STMT_VINFO_LIVE_P (stmt_info))
2001 /* FORNOW: not yet supported. */
2002 if (vect_print_dump_info (REPORT_DETAILS))
2003 fprintf (vect_dump, "value used after loop.");
2007 if (TREE_CODE (stmt) != MODIFY_EXPR)
2010 op = TREE_OPERAND (stmt, 1);
2012 if (TREE_CODE (op) != COND_EXPR)
2015 cond_expr = TREE_OPERAND (op, 0);
2016 then_clause = TREE_OPERAND (op, 1);
2017 else_clause = TREE_OPERAND (op, 2);
2019 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2022 if (TREE_CODE (then_clause) == SSA_NAME)
2024 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2025 if (!vect_is_simple_use (then_clause, loop_vinfo,
2026 &then_def_stmt, &def, &dt))
2029 else if (TREE_CODE (then_clause) != INTEGER_CST
2030 && TREE_CODE (then_clause) != REAL_CST)
2033 if (TREE_CODE (else_clause) == SSA_NAME)
2035 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2036 if (!vect_is_simple_use (else_clause, loop_vinfo,
2037 &else_def_stmt, &def, &dt))
2040 else if (TREE_CODE (else_clause) != INTEGER_CST
2041 && TREE_CODE (else_clause) != REAL_CST)
2045 vec_mode = TYPE_MODE (vectype);
2049 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2050 return expand_vec_cond_expr_p (op, vec_mode);
2056 scalar_dest = TREE_OPERAND (stmt, 0);
2057 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2059 /* Handle cond expr. */
2061 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2063 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2064 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2065 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2067 /* Arguments are ready. create the new vector stmt. */
2068 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2069 vec_cond_lhs, vec_cond_rhs);
2070 vec_cond_expr = build (VEC_COND_EXPR, vectype,
2071 vec_compare, vec_then_clause, vec_else_clause);
2073 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2074 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2075 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2076 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2081 /* Function vect_transform_stmt.
2083 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2086 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2088 bool is_store = false;
2089 tree vec_stmt = NULL_TREE;
2090 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2093 if (STMT_VINFO_RELEVANT_P (stmt_info))
2095 switch (STMT_VINFO_TYPE (stmt_info))
2097 case op_vec_info_type:
2098 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2102 case assignment_vec_info_type:
2103 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2107 case load_vec_info_type:
2108 done = vectorizable_load (stmt, bsi, &vec_stmt);
2112 case store_vec_info_type:
2113 done = vectorizable_store (stmt, bsi, &vec_stmt);
2118 case condition_vec_info_type:
2119 done = vectorizable_condition (stmt, bsi, &vec_stmt);
2124 if (vect_print_dump_info (REPORT_DETAILS))
2125 fprintf (vect_dump, "stmt not supported.");
2129 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2132 if (STMT_VINFO_LIVE_P (stmt_info))
2134 switch (STMT_VINFO_TYPE (stmt_info))
2136 case reduc_vec_info_type:
2137 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2142 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2148 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2149 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2157 /* This function builds ni_name = number of iterations loop executes
2158 on the loop preheader. */
2161 vect_build_loop_niters (loop_vec_info loop_vinfo)
2163 tree ni_name, stmt, var;
2165 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2166 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2168 var = create_tmp_var (TREE_TYPE (ni), "niters");
2169 add_referenced_tmp_var (var);
2170 ni_name = force_gimple_operand (ni, &stmt, false, var);
2172 pe = loop_preheader_edge (loop);
2175 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2176 gcc_assert (!new_bb);
2183 /* This function generates the following statements:
2185 ni_name = number of iterations loop executes
2186 ratio = ni_name / vf
2187 ratio_mult_vf_name = ratio * vf
2189 and places them at the loop preheader edge. */
2192 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2194 tree *ratio_mult_vf_name_ptr,
2195 tree *ratio_name_ptr)
2203 tree ratio_mult_vf_name;
2204 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2205 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2206 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2207 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2209 pe = loop_preheader_edge (loop);
2211 /* Generate temporary variable that contains
2212 number of iterations loop executes. */
2214 ni_name = vect_build_loop_niters (loop_vinfo);
2216 /* Create: ratio = ni >> log2(vf) */
2218 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2219 add_referenced_tmp_var (var);
2220 ratio_name = make_ssa_name (var, NULL_TREE);
2221 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2222 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2223 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2225 pe = loop_preheader_edge (loop);
2226 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2227 gcc_assert (!new_bb);
2229 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2231 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2232 add_referenced_tmp_var (var);
2233 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2234 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2235 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2236 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2238 pe = loop_preheader_edge (loop);
2239 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2240 gcc_assert (!new_bb);
2242 *ni_name_ptr = ni_name;
2243 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2244 *ratio_name_ptr = ratio_name;
2250 /* Function update_vuses_to_preheader.
2253 STMT - a statement with potential VUSEs.
2254 LOOP - the loop whose preheader will contain STMT.
2256 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2257 appears to be defined in a V_MAY_DEF in another statement in a loop.
2258 One such case is when the VUSE is at the dereference of a __restricted__
2259 pointer in a load and the V_MAY_DEF is at the dereference of a different
2260 __restricted__ pointer in a store. Vectorization may result in
2261 copy_virtual_uses being called to copy the problematic VUSE to a new
2262 statement that is being inserted in the loop preheader. This procedure
2263 is called to change the SSA_NAME in the new statement's VUSE from the
2264 SSA_NAME updated in the loop to the related SSA_NAME available on the
2265 path entering the loop.
2267 When this function is called, we have the following situation:
2272 # name1 = phi < name0 , name2>
2277 # name2 = vdef <name1>
2282 Stmt S1 was created in the loop preheader block as part of misaligned-load
2283 handling. This function fixes the name of the vuse of S1 from 'name1' to
2287 update_vuses_to_preheader (tree stmt, struct loop *loop)
2289 basic_block header_bb = loop->header;
2290 edge preheader_e = loop_preheader_edge (loop);
2292 use_operand_p use_p;
2294 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2296 tree ssa_name = USE_FROM_PTR (use_p);
2297 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2298 tree name_var = SSA_NAME_VAR (ssa_name);
2299 basic_block bb = bb_for_stmt (def_stmt);
2301 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2302 if (!IS_EMPTY_STMT (def_stmt)
2303 && flow_bb_inside_loop_p (loop, bb))
2305 /* If the block containing the statement defining the SSA_NAME
2306 is in the loop then it's necessary to find the definition
2307 outside the loop using the PHI nodes of the header. */
2309 bool updated = false;
2311 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2313 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2315 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2320 gcc_assert (updated);
2326 /* Function vect_update_ivs_after_vectorizer.
2328 "Advance" the induction variables of LOOP to the value they should take
2329 after the execution of LOOP. This is currently necessary because the
2330 vectorizer does not handle induction variables that are used after the
2331 loop. Such a situation occurs when the last iterations of LOOP are
2333 1. We introduced new uses after LOOP for IVs that were not originally used
2334 after LOOP: the IVs of LOOP are now used by an epilog loop.
2335 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2336 times, whereas the loop IVs should be bumped N times.
2339 - LOOP - a loop that is going to be vectorized. The last few iterations
2340 of LOOP were peeled.
2341 - NITERS - the number of iterations that LOOP executes (before it is
2342 vectorized). i.e, the number of times the ivs should be bumped.
2343 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2344 coming out from LOOP on which there are uses of the LOOP ivs
2345 (this is the path from LOOP->exit to epilog_loop->preheader).
2347 The new definitions of the ivs are placed in LOOP->exit.
2348 The phi args associated with the edge UPDATE_E in the bb
2349 UPDATE_E->dest are updated accordingly.
2351 Assumption 1: Like the rest of the vectorizer, this function assumes
2352 a single loop exit that has a single predecessor.
2354 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2355 organized in the same order.
2357 Assumption 3: The access function of the ivs is simple enough (see
2358 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2360 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2361 coming out of LOOP on which the ivs of LOOP are used (this is the path
2362 that leads to the epilog loop; other paths skip the epilog loop). This
2363 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2364 needs to have its phis updated.
2368 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2371 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2372 basic_block exit_bb = loop->single_exit->dest;
2374 basic_block update_bb = update_e->dest;
2376 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2378 /* Make sure there exists a single-predecessor exit bb: */
2379 gcc_assert (single_pred_p (exit_bb));
2381 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2383 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2385 tree access_fn = NULL;
2386 tree evolution_part;
2389 tree var, stmt, ni, ni_name;
2390 block_stmt_iterator last_bsi;
2392 if (vect_print_dump_info (REPORT_DETAILS))
2394 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2395 print_generic_expr (vect_dump, phi, TDF_SLIM);
2398 /* Skip virtual phi's. */
2399 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2401 if (vect_print_dump_info (REPORT_DETAILS))
2402 fprintf (vect_dump, "virtual phi. skip.");
2406 /* Skip reduction phis. */
2407 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2409 if (vect_print_dump_info (REPORT_DETAILS))
2410 fprintf (vect_dump, "reduc phi. skip.");
2414 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2415 gcc_assert (access_fn);
2417 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2418 gcc_assert (evolution_part != NULL_TREE);
2420 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2421 of degree >= 2 or exponential. */
2422 gcc_assert (!tree_is_chrec (evolution_part));
2424 step_expr = evolution_part;
2425 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2428 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2429 build2 (MULT_EXPR, TREE_TYPE (niters),
2430 niters, step_expr), init_expr);
2432 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2433 add_referenced_tmp_var (var);
2435 ni_name = force_gimple_operand (ni, &stmt, false, var);
2437 /* Insert stmt into exit_bb. */
2438 last_bsi = bsi_last (exit_bb);
2440 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2442 /* Fix phi expressions in the successor bb. */
2443 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2448 /* Function vect_do_peeling_for_loop_bound
2450 Peel the last iterations of the loop represented by LOOP_VINFO.
2451 The peeled iterations form a new epilog loop. Given that the loop now
2452 iterates NITERS times, the new epilog loop iterates
2453 NITERS % VECTORIZATION_FACTOR times.
2455 The original loop will later be made to iterate
2456 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2459 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2460 struct loops *loops)
2462 tree ni_name, ratio_mult_vf_name;
2463 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2464 struct loop *new_loop;
2466 basic_block preheader;
2469 if (vect_print_dump_info (REPORT_DETAILS))
2470 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2472 initialize_original_copy_tables ();
2474 /* Generate the following variables on the preheader of original loop:
2476 ni_name = number of iteration the original loop executes
2477 ratio = ni_name / vf
2478 ratio_mult_vf_name = ratio * vf */
2479 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2480 &ratio_mult_vf_name, ratio);
2482 loop_num = loop->num;
2483 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2484 ratio_mult_vf_name, ni_name, false);
2485 gcc_assert (new_loop);
2486 gcc_assert (loop_num == loop->num);
2487 #ifdef ENABLE_CHECKING
2488 slpeel_verify_cfg_after_peeling (loop, new_loop);
2491 /* A guard that controls whether the new_loop is to be executed or skipped
2492 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2493 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2494 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2495 is on the path where the LOOP IVs are used and need to be updated. */
2497 preheader = loop_preheader_edge (new_loop)->src;
2498 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2499 update_e = EDGE_PRED (preheader, 0);
2501 update_e = EDGE_PRED (preheader, 1);
2503 /* Update IVs of original loop as if they were advanced
2504 by ratio_mult_vf_name steps. */
2505 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2507 /* After peeling we have to reset scalar evolution analyzer. */
2510 free_original_copy_tables ();
2514 /* Function vect_gen_niters_for_prolog_loop
2516 Set the number of iterations for the loop represented by LOOP_VINFO
2517 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2518 and the misalignment of DR - the data reference recorded in
2519 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2520 this loop, the data reference DR will refer to an aligned location.
2522 The following computation is generated:
2524 If the misalignment of DR is known at compile time:
2525 addr_mis = int mis = DR_MISALIGNMENT (dr);
2526 Else, compute address misalignment in bytes:
2527 addr_mis = addr & (vectype_size - 1)
2529 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2531 (elem_size = element type size; an element is the scalar element
2532 whose type is the inner type of the vectype) */
2535 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2537 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2538 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2539 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2541 tree iters, iters_name;
2544 tree dr_stmt = DR_STMT (dr);
2545 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2546 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2547 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2548 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
2549 tree niters_type = TREE_TYPE (loop_niters);
2551 pe = loop_preheader_edge (loop);
2553 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2555 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2556 int element_size = vectype_align/vf;
2557 int elem_misalign = byte_misalign / element_size;
2559 if (vect_print_dump_info (REPORT_DETAILS))
2560 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2561 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2565 tree new_stmts = NULL_TREE;
2567 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2568 tree ptr_type = TREE_TYPE (start_addr);
2569 tree size = TYPE_SIZE (ptr_type);
2570 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2571 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2572 tree elem_size_log =
2573 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
2574 tree vf_tree = build_int_cst (unsigned_type_node, vf);
2578 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2579 gcc_assert (!new_bb);
2581 /* Create: byte_misalign = addr & (vectype_size - 1) */
2583 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2585 /* Create: elem_misalign = byte_misalign / element_size */
2587 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
2589 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2590 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
2591 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
2592 iters = fold_convert (niters_type, iters);
2595 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2596 /* If the loop bound is known at compile time we already verified that it is
2597 greater than vf; since the misalignment ('iters') is at most vf, there's
2598 no need to generate the MIN_EXPR in this case. */
2599 if (TREE_CODE (loop_niters) != INTEGER_CST)
2600 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2602 if (vect_print_dump_info (REPORT_DETAILS))
2604 fprintf (vect_dump, "niters for prolog loop: ");
2605 print_generic_expr (vect_dump, iters, TDF_SLIM);
2608 var = create_tmp_var (niters_type, "prolog_loop_niters");
2609 add_referenced_tmp_var (var);
2610 iters_name = force_gimple_operand (iters, &stmt, false, var);
2612 /* Insert stmt on loop preheader edge. */
2615 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2616 gcc_assert (!new_bb);
2623 /* Function vect_update_init_of_dr
2625 NITERS iterations were peeled from LOOP. DR represents a data reference
2626 in LOOP. This function updates the information recorded in DR to
2627 account for the fact that the first NITERS iterations had already been
2628 executed. Specifically, it updates the OFFSET field of DR. */
2631 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2633 tree offset = DR_OFFSET (dr);
2635 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2636 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2637 DR_OFFSET (dr) = offset;
2641 /* Function vect_update_inits_of_drs
2643 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2644 This function updates the information recorded for the data references in
2645 the loop to account for the fact that the first NITERS iterations had
2646 already been executed. Specifically, it updates the initial_condition of the
2647 access_function of all the data_references in the loop. */
2650 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2653 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2655 if (vect_dump && (dump_flags & TDF_DETAILS))
2656 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2658 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
2660 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
2661 vect_update_init_of_dr (dr, niters);
2666 /* Function vect_do_peeling_for_alignment
2668 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2669 'niters' is set to the misalignment of one of the data references in the
2670 loop, thereby forcing it to refer to an aligned location at the beginning
2671 of the execution of this loop. The data reference for which we are
2672 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2675 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2677 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2678 tree niters_of_prolog_loop, ni_name;
2680 struct loop *new_loop;
2682 if (vect_print_dump_info (REPORT_DETAILS))
2683 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2685 initialize_original_copy_tables ();
2687 ni_name = vect_build_loop_niters (loop_vinfo);
2688 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2690 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2692 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2693 niters_of_prolog_loop, ni_name, true);
2694 gcc_assert (new_loop);
2695 #ifdef ENABLE_CHECKING
2696 slpeel_verify_cfg_after_peeling (new_loop, loop);
2699 /* Update number of times loop executes. */
2700 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2701 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2702 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2704 /* Update the init conditions of the access functions of all data refs. */
2705 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2707 /* After peeling we have to reset scalar evolution analyzer. */
2710 free_original_copy_tables ();
2714 /* Function vect_transform_loop.
2716 The analysis phase has determined that the loop is vectorizable.
2717 Vectorize the loop - created vectorized stmts to replace the scalar
2718 stmts in the loop, and update the loop exit condition. */
2721 vect_transform_loop (loop_vec_info loop_vinfo,
2722 struct loops *loops ATTRIBUTE_UNUSED)
2724 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2725 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2726 int nbbs = loop->num_nodes;
2727 block_stmt_iterator si;
2730 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2732 if (vect_print_dump_info (REPORT_DETAILS))
2733 fprintf (vect_dump, "=== vec_transform_loop ===");
2735 /* Peel the loop if there are data refs with unknown alignment.
2736 Only one data ref with unknown store is allowed. */
2738 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2739 vect_do_peeling_for_alignment (loop_vinfo, loops);
2741 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2742 compile time constant), or it is a constant that doesn't divide by the
2743 vectorization factor, then an epilog loop needs to be created.
2744 We therefore duplicate the loop: the original loop will be vectorized,
2745 and will compute the first (n/VF) iterations. The second copy of the loop
2746 will remain scalar and will compute the remaining (n%VF) iterations.
2747 (VF is the vectorization factor). */
2749 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2750 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2751 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2752 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2754 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2755 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2757 /* 1) Make sure the loop header has exactly two entries
2758 2) Make sure we have a preheader basic block. */
2760 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2762 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2765 /* FORNOW: the vectorizer supports only loops which body consist
2766 of one basic block (header + empty latch). When the vectorizer will
2767 support more involved loop forms, the order by which the BBs are
2768 traversed need to be reconsidered. */
2770 for (i = 0; i < nbbs; i++)
2772 basic_block bb = bbs[i];
2774 for (si = bsi_start (bb); !bsi_end_p (si);)
2776 tree stmt = bsi_stmt (si);
2777 stmt_vec_info stmt_info;
2780 if (vect_print_dump_info (REPORT_DETAILS))
2782 fprintf (vect_dump, "------>vectorizing statement: ");
2783 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2785 stmt_info = vinfo_for_stmt (stmt);
2786 gcc_assert (stmt_info);
2787 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2788 && !STMT_VINFO_LIVE_P (stmt_info))
2793 /* FORNOW: Verify that all stmts operate on the same number of
2794 units and no inner unrolling is necessary. */
2796 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2797 == (unsigned HOST_WIDE_INT) vectorization_factor);
2799 /* -------- vectorize statement ------------ */
2800 if (vect_print_dump_info (REPORT_DETAILS))
2801 fprintf (vect_dump, "transform statement.");
2803 is_store = vect_transform_stmt (stmt, &si);
2806 /* Free the attached stmt_vec_info and remove the stmt. */
2807 stmt_ann_t ann = stmt_ann (stmt);
2809 set_stmt_info ((tree_ann_t)ann, NULL);
2818 slpeel_make_loop_iterate_ntimes (loop, ratio);
2820 /* The memory tags and pointers in vectorized statements need to
2821 have their SSA forms updated. FIXME, why can't this be delayed
2822 until all the loops have been transformed? */
2823 update_ssa (TODO_update_ssa);
2825 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2826 fprintf (vect_dump, "LOOP VECTORIZED.");