1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
38 #include "tree-data-ref.h"
39 #include "tree-chrec.h"
40 #include "tree-scalar-evolution.h"
41 #include "tree-vectorizer.h"
42 #include "langhooks.h"
43 #include "tree-pass.h"
47 /* Utility functions for the code transformation. */
48 static bool vect_transform_stmt (tree, block_stmt_iterator *);
49 static void vect_align_data_ref (tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, bool);
53 static tree vect_create_index_for_vector_ref (loop_vec_info);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
73 static void vect_do_peeling_for_loop_bound
74 (loop_vec_info, tree *, struct loops *);
75 static int vect_min_worthwhile_factor (enum tree_code);
78 /* Function vect_get_new_vect_var.
80 Returns a name for a new variable. The current naming scheme appends the
81 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
82 the name of vectorizer generated variables, and appends that to NAME if
86 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
99 case vect_pointer_var:
107 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
109 new_vect_var = create_tmp_var (type, prefix);
115 /* Function vect_create_index_for_vector_ref.
117 Create (and return) an index variable, along with it's update chain in the
118 loop. This variable will be used to access a memory location in a vector
122 LOOP: The loop being vectorized.
123 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
124 function can be added here, or in the loop pre-header.
127 Return an index that will be used to index a vector array. It is expected
128 that a pointer to the first vector will be used as the base address for the
131 FORNOW: we are not trying to be efficient, just creating a new index each
132 time from scratch. At this time all vector references could use the same
135 TODO: create only one index to be used by all vector references. Record
136 the index in the LOOP_VINFO the first time this procedure is called and
137 return it on subsequent calls. The increment of this index must be placed
138 just before the conditional expression that ends the single block loop. */
141 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
144 block_stmt_iterator incr_bsi;
146 tree indx_before_incr, indx_after_incr;
147 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
150 /* It is assumed that the base pointer used for vectorized access contains
151 the address of the first vector. Therefore the index used for vectorized
152 access must be initialized to zero and incremented by 1. */
154 init = integer_zero_node;
155 step = integer_one_node;
157 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
158 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
159 &indx_before_incr, &indx_after_incr);
160 incr = bsi_stmt (incr_bsi);
161 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
163 return indx_before_incr;
167 /* Function vect_create_addr_base_for_vector_ref.
169 Create an expression that computes the address of the first memory location
170 that will be accessed for a data reference.
173 STMT: The statement containing the data reference.
174 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
175 OFFSET: Optional. If supplied, it is be added to the initial address.
178 1. Return an SSA_NAME whose value is the address of the memory location of
179 the first vector of the data reference.
180 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
181 these statement(s) which define the returned SSA_NAME.
183 FORNOW: We are only handling array accesses with step 1. */
186 vect_create_addr_base_for_vector_ref (tree stmt,
190 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
191 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
193 unshare_expr (STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info));
194 tree base_name = build_fold_indirect_ref (data_ref_base);
195 tree ref = DR_REF (dr);
196 tree scalar_type = TREE_TYPE (ref);
197 tree scalar_ptr_type = build_pointer_type (scalar_type);
200 tree addr_base, addr_expr;
202 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
204 /* Create base_offset */
205 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
206 add_referenced_tmp_var (dest);
207 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
208 append_to_statement_list_force (new_stmt, new_stmt_list);
212 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
213 add_referenced_tmp_var (tmp);
214 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
215 STMT_VINFO_VECT_STEP (stmt_info));
216 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
217 base_offset, offset);
218 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
219 append_to_statement_list_force (new_stmt, new_stmt_list);
222 /* base + base_offset */
223 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
226 /* addr_expr = addr_base */
227 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
228 get_name (base_name));
229 add_referenced_tmp_var (addr_expr);
230 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
231 new_temp = make_ssa_name (addr_expr, vec_stmt);
232 TREE_OPERAND (vec_stmt, 0) = new_temp;
233 append_to_statement_list_force (vec_stmt, new_stmt_list);
235 if (vect_print_dump_info (REPORT_DETAILS))
237 fprintf (vect_dump, "created ");
238 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
244 /* Function vect_align_data_ref.
246 Handle misalignment of a memory accesses.
248 FORNOW: Can't handle misaligned accesses.
249 Make sure that the dataref is aligned. */
252 vect_align_data_ref (tree stmt)
254 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
255 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
257 /* FORNOW: can't handle misaligned accesses;
258 all accesses expected to be aligned. */
259 gcc_assert (aligned_access_p (dr));
263 /* Function vect_create_data_ref_ptr.
265 Create a memory reference expression for vector access, to be used in a
266 vector load/store stmt. The reference is based on a new pointer to vector
270 1. STMT: a stmt that references memory. Expected to be of the form
271 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
272 2. BSI: block_stmt_iterator where new stmts can be added.
273 3. OFFSET (optional): an offset to be added to the initial address accessed
274 by the data-ref in STMT.
275 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
276 pointing to the initial address.
279 1. Declare a new ptr to vector_type, and have it point to the base of the
280 data reference (initial addressed accessed by the data reference).
281 For example, for vector of type V8HI, the following code is generated:
284 vp = (v8hi *)initial_address;
286 if OFFSET is not supplied:
287 initial_address = &a[init];
288 if OFFSET is supplied:
289 initial_address = &a[init + OFFSET];
291 Return the initial_address in INITIAL_ADDRESS.
293 2. Create a data-reference in the loop based on the new vector pointer vp,
294 and using a new index variable 'idx' as follows:
298 where if ONLY_INIT is true:
301 update = idx + vector_type_size
303 Return the pointer vp'.
306 FORNOW: handle only aligned and consecutive accesses. */
309 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
310 tree *initial_address, bool only_init)
313 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
315 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
316 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
322 tree new_stmt_list = NULL_TREE;
324 edge pe = loop_preheader_edge (loop);
330 tree type, tmp, size;
332 base_name = build_fold_indirect_ref (unshare_expr (
333 STMT_VINFO_VECT_DR_BASE_ADDRESS (stmt_info)));
335 if (vect_print_dump_info (REPORT_DETAILS))
337 tree data_ref_base = base_name;
338 fprintf (vect_dump, "create array_ref of type: ");
339 print_generic_expr (vect_dump, vectype, TDF_SLIM);
340 if (TREE_CODE (data_ref_base) == VAR_DECL)
341 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
342 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
343 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
344 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
345 fprintf (vect_dump, " vectorizing a record based array ref: ");
346 else if (TREE_CODE (data_ref_base) == SSA_NAME)
347 fprintf (vect_dump, " vectorizing a pointer ref: ");
348 print_generic_expr (vect_dump, base_name, TDF_SLIM);
351 /** (1) Create the new vector-pointer variable: **/
353 vect_ptr_type = build_pointer_type (vectype);
354 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
355 get_name (base_name));
356 add_referenced_tmp_var (vect_ptr);
359 /** (2) Add aliasing information to the new vector-pointer:
360 (The points-to info (SSA_NAME_PTR_INFO) may be defined later.) **/
362 tag = STMT_VINFO_MEMTAG (stmt_info);
365 /* If tag is a variable (and NOT_A_TAG) than a new type alias
366 tag must be created with tag added to its may alias list. */
367 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
368 new_type_alias (vect_ptr, tag);
370 var_ann (vect_ptr)->type_mem_tag = tag;
372 var_ann (vect_ptr)->subvars = STMT_VINFO_SUBVARS (stmt_info);
374 /** (3) Calculate the initial address the vector-pointer, and set
375 the vector-pointer to point to it before the loop: **/
377 /* Create: (&(base[init_val+offset]) in the loop preheader. */
378 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
380 pe = loop_preheader_edge (loop);
381 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
382 gcc_assert (!new_bb);
383 *initial_address = new_temp;
385 /* Create: p = (vectype *) initial_base */
386 vec_stmt = fold_convert (vect_ptr_type, new_temp);
387 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
388 new_temp = make_ssa_name (vect_ptr, vec_stmt);
389 TREE_OPERAND (vec_stmt, 0) = new_temp;
390 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
391 gcc_assert (!new_bb);
392 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
395 /** (4) Handle the updating of the vector-pointer inside the loop: **/
397 if (only_init) /* No update in loop is required. */
399 /* Copy the points-to information if it exists. */
400 if (STMT_VINFO_PTR_INFO (stmt_info))
401 duplicate_ssa_name_ptr_info (vect_ptr_init,
402 STMT_VINFO_PTR_INFO (stmt_info));
403 return vect_ptr_init;
406 idx = vect_create_index_for_vector_ref (loop_vinfo);
408 /* Create: update = idx * vectype_size */
409 tmp = create_tmp_var (integer_type_node, "update");
410 add_referenced_tmp_var (tmp);
411 size = TYPE_SIZE (vect_ptr_type);
412 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
413 ptr_update = create_tmp_var (type, "update");
414 add_referenced_tmp_var (ptr_update);
415 vectype_size = TYPE_SIZE_UNIT (vectype);
416 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
417 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
418 new_temp = make_ssa_name (tmp, vec_stmt);
419 TREE_OPERAND (vec_stmt, 0) = new_temp;
420 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
421 vec_stmt = fold_convert (type, new_temp);
422 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
423 new_temp = make_ssa_name (ptr_update, vec_stmt);
424 TREE_OPERAND (vec_stmt, 0) = new_temp;
425 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
427 /* Create: data_ref_ptr = vect_ptr_init + update */
428 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
429 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
430 new_temp = make_ssa_name (vect_ptr, vec_stmt);
431 TREE_OPERAND (vec_stmt, 0) = new_temp;
432 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
433 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
435 /* Copy the points-to information if it exists. */
436 if (STMT_VINFO_PTR_INFO (stmt_info))
437 duplicate_ssa_name_ptr_info (data_ref_ptr, STMT_VINFO_PTR_INFO (stmt_info));
442 /* Function vect_create_destination_var.
444 Create a new temporary of type VECTYPE. */
447 vect_create_destination_var (tree scalar_dest, tree vectype)
450 const char *new_name;
452 enum vect_var_kind kind;
454 kind = vectype ? vect_simple_var : vect_scalar_var;
455 type = vectype ? vectype : TREE_TYPE (scalar_dest);
457 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
459 new_name = get_name (scalar_dest);
462 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
463 add_referenced_tmp_var (vec_dest);
469 /* Function vect_init_vector.
471 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
472 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
473 used in the vectorization of STMT. */
476 vect_init_vector (tree stmt, tree vector_var)
478 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
480 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
483 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
489 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
490 add_referenced_tmp_var (new_var);
492 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
493 new_temp = make_ssa_name (new_var, init_stmt);
494 TREE_OPERAND (init_stmt, 0) = new_temp;
496 pe = loop_preheader_edge (loop);
497 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
498 gcc_assert (!new_bb);
500 if (vect_print_dump_info (REPORT_DETAILS))
502 fprintf (vect_dump, "created new init_stmt: ");
503 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
506 vec_oprnd = TREE_OPERAND (init_stmt, 0);
511 /* Function vect_get_vec_def_for_operand.
513 OP is an operand in STMT. This function returns a (vector) def that will be
514 used in the vectorized stmt for STMT.
516 In the case that OP is an SSA_NAME which is defined in the loop, then
517 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
519 In case OP is an invariant or constant, a new stmt that creates a vector def
520 needs to be introduced. */
523 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
528 stmt_vec_info def_stmt_info = NULL;
529 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
530 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
531 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
532 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
533 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
539 enum vect_def_type dt;
542 if (vect_print_dump_info (REPORT_DETAILS))
544 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
545 print_generic_expr (vect_dump, op, TDF_SLIM);
548 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
549 gcc_assert (is_simple_use);
550 if (vect_print_dump_info (REPORT_DETAILS))
554 fprintf (vect_dump, "def = ");
555 print_generic_expr (vect_dump, def, TDF_SLIM);
559 fprintf (vect_dump, " def_stmt = ");
560 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
566 /* Case 1: operand is a constant. */
567 case vect_constant_def:
572 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
573 if (vect_print_dump_info (REPORT_DETAILS))
574 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
576 for (i = nunits - 1; i >= 0; --i)
578 t = tree_cons (NULL_TREE, op, t);
580 vec_cst = build_vector (vectype, t);
581 return vect_init_vector (stmt, vec_cst);
584 /* Case 2: operand is defined outside the loop - loop invariant. */
585 case vect_invariant_def:
590 /* Create 'vec_inv = {inv,inv,..,inv}' */
591 if (vect_print_dump_info (REPORT_DETAILS))
592 fprintf (vect_dump, "Create vector_inv.");
594 for (i = nunits - 1; i >= 0; --i)
596 t = tree_cons (NULL_TREE, def, t);
599 vec_inv = build_constructor (vectype, t);
600 return vect_init_vector (stmt, vec_inv);
603 /* Case 3: operand is defined inside the loop. */
607 *scalar_def = def_stmt;
609 /* Get the def from the vectorized stmt. */
610 def_stmt_info = vinfo_for_stmt (def_stmt);
611 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
612 gcc_assert (vec_stmt);
613 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
617 /* Case 4: operand is defined by a loop header phi - reduction */
618 case vect_reduction_def:
620 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
622 /* Get the def before the loop */
623 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
624 return get_initial_def_for_reduction (stmt, op, scalar_def);
627 /* Case 5: operand is defined by loop-header phi - induction. */
628 case vect_induction_def:
630 if (vect_print_dump_info (REPORT_DETAILS))
631 fprintf (vect_dump, "induction - unsupported.");
632 internal_error ("no support for induction"); /* FORNOW */
641 /* Function vect_finish_stmt_generation.
643 Insert a new stmt. */
646 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
648 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
650 if (vect_print_dump_info (REPORT_DETAILS))
652 fprintf (vect_dump, "add new stmt: ");
653 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
656 /* Make sure bsi points to the stmt that is being vectorized. */
657 gcc_assert (stmt == bsi_stmt (*bsi));
659 #ifdef USE_MAPPED_LOCATION
660 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
662 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
667 #define ADJUST_IN_EPILOG 1
669 /* Function get_initial_def_for_reduction
672 STMT - a stmt that performs a reduction operation in the loop.
673 INIT_VAL - the initial value of the reduction variable
676 SCALAR_DEF - a tree that holds a value to be added to the final result
677 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
678 Return a vector variable, initialized according to the operation that STMT
679 performs. This vector will be used as the initial value of the
680 vector of partial results.
682 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
685 min/max: [init_val,init_val,..,init_val,init_val]
686 bit and/or: [init_val,init_val,..,init_val,init_val]
687 and when necessary (e.g. add/mult case) let the caller know
688 that it needs to adjust the result by init_val.
690 Option2: Initialize the vector as follows:
691 add: [0,0,...,0,init_val]
692 mult: [1,1,...,1,init_val]
693 min/max: [init_val,init_val,...,init_val]
694 bit and/or: [init_val,init_val,...,init_val]
695 and no adjustments are needed.
697 For example, for the following code:
703 STMT is 's = s + a[i]', and the reduction variable is 's'.
704 For a vector of 4 units, we want to return either [0,0,0,init_val],
705 or [0,0,0,0] and let the caller know that it needs to adjust
706 the result at the end by 'init_val'.
708 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
709 TODO: Use some cost-model to estimate which scheme is more profitable.
713 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
715 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
716 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
717 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
719 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
720 tree type = TREE_TYPE (init_val);
722 tree vec, t = NULL_TREE;
723 bool need_epilog_adjust;
726 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
731 if (INTEGRAL_TYPE_P (type))
732 def = build_int_cst (type, 0);
734 def = build_real (type, dconst0);
736 #ifdef ADJUST_IN_EPILOG
737 /* All the 'nunits' elements are set to 0. The final result will be
738 adjusted by 'init_val' at the loop epilog. */
740 need_epilog_adjust = true;
742 /* 'nunits - 1' elements are set to 0; The last element is set to
743 'init_val'. No further adjustments at the epilog are needed. */
744 nelements = nunits - 1;
745 need_epilog_adjust = false;
753 need_epilog_adjust = true;
760 for (i = nelements - 1; i >= 0; --i)
761 t = tree_cons (NULL_TREE, def, t);
763 if (nelements == nunits - 1)
765 /* Set the last element of the vector. */
766 t = tree_cons (NULL_TREE, init_val, t);
769 gcc_assert (nelements == nunits);
771 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
772 vec = build_vector (vectype, t);
774 vec = build_constructor (vectype, t);
776 if (!need_epilog_adjust)
778 if (INTEGRAL_TYPE_P (type))
779 init_val = build_int_cst (type, 0);
781 init_val = build_real (type, dconst0);
783 *scalar_def = init_val;
785 return vect_init_vector (stmt, vec);
789 /* Function vect_create_epilog_for_reduction:
791 Create code at the loop-epilog to finalize the result of a reduction
794 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
795 into a single result, by applying the operation REDUC_CODE on the
796 partial-results-vector. For this, we need to create a new phi node at the
797 loop exit to preserve loop-closed form, as illustrated below.
799 STMT is the original scalar reduction stmt that is being vectorized.
800 REDUCTION_OP is the scalar reduction-variable.
801 REDUCTION_PHI is the phi-node that carries the reduction computation.
802 This function also sets the arguments for the REDUCTION_PHI:
803 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
804 The loop-latch argument is VECT_DEF - the vector of partial sums.
806 This function transforms this:
809 vec_def = phi <null, null> # REDUCTION_PHI
814 s_out0 = phi <s_loop> # EXIT_PHI
822 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
827 s_out0 = phi <s_loop> # EXIT_PHI
828 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
830 v_out2 = reduc_expr <v_out1>
831 s_out3 = extract_field <v_out2, 0>
838 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
839 enum tree_code reduc_code, tree reduction_phi)
841 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
842 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
843 enum machine_mode mode = TYPE_MODE (vectype);
844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
845 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
847 tree scalar_dest = TREE_OPERAND (stmt, 0);
848 tree scalar_type = TREE_TYPE (scalar_dest);
850 block_stmt_iterator exit_bsi;
855 tree new_scalar_dest, exit_phi;
856 tree bitsize, bitpos, bytesize;
857 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
858 tree scalar_initial_def;
859 tree vec_initial_def;
861 imm_use_iterator imm_iter;
863 bool extract_scalar_result;
864 bool adjust_in_epilog;
866 /*** 1. Create the reduction def-use cycle ***/
868 /* 1.1 set the loop-entry arg of the reduction-phi: */
869 /* For the case of reduction, vect_get_vec_def_for_operand returns
870 the scalar def before the loop, that defines the initial value
871 of the reduction variable. */
872 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
873 &scalar_initial_def);
874 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
877 /* 1.2 set the loop-latch arg for the reduction-phi: */
878 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
880 if (vect_print_dump_info (REPORT_DETAILS))
882 fprintf (vect_dump, "transform reduction: created def-use cycle:");
883 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
884 fprintf (vect_dump, "\n");
885 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
889 /*** 2. Create epilog code ***/
891 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
892 v_out1 = phi <v_loop> */
894 exit_bb = loop->single_exit->dest;
895 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
896 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
898 exit_bsi = bsi_start (exit_bb);
901 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
902 bitsize = TYPE_SIZE (scalar_type);
903 bytesize = TYPE_SIZE_UNIT (scalar_type);
905 /* 2.2 Create the reduction code. */
907 if (reduc_code < NUM_TREE_CODES)
910 v_out2 = reduc_expr <v_out1> */
912 if (vect_print_dump_info (REPORT_DETAILS))
913 fprintf (vect_dump, "Reduce using direct vector reduction.");
915 vec_dest = vect_create_destination_var (scalar_dest, vectype);
916 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
917 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
918 new_temp = make_ssa_name (vec_dest, epilog_stmt);
919 TREE_OPERAND (epilog_stmt, 0) = new_temp;
920 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
922 extract_scalar_result = true;
923 adjust_in_epilog = true;
927 enum tree_code shift_code = 0;
928 bool have_whole_vector_shift = true;
929 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
931 int element_bitsize = tree_low_cst (bitsize, 1);
932 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
935 /* The result of the reduction is expected to be at the least
936 significant bits of the vector. This is merely convention,
937 as it's the extraction later that really matters, and that
938 is also under our control. */
939 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
940 shift_code = VEC_RSHIFT_EXPR;
942 have_whole_vector_shift = false;
944 /* Regardless of whether we have a whole vector shift, if we're
945 emulating the operation via tree-vect-generic, we don't want
946 to use it. Only the first round of the reduction is likely
947 to still be profitable via emulation. */
948 /* ??? It might be better to emit a reduction tree code here, so that
949 tree-vect-generic can expand the first round via bit tricks. */
950 if (!VECTOR_MODE_P (mode))
951 have_whole_vector_shift = false;
954 optab optab = optab_for_tree_code (code, vectype);
955 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
956 have_whole_vector_shift = false;
959 if (have_whole_vector_shift)
962 for (offset = VS/2; offset >= element_size; offset/=2)
964 Create: va' = vec_shift <va, offset>
965 Create: va = vop <va, va'>
968 if (vect_print_dump_info (REPORT_DETAILS))
969 fprintf (vect_dump, "Reduce using vector shifts");
971 vec_dest = vect_create_destination_var (scalar_dest, vectype);
972 new_temp = PHI_RESULT (new_phi);
974 for (bit_offset = vec_size_in_bits/2;
975 bit_offset >= element_bitsize;
978 tree bitpos = size_int (bit_offset);
980 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
981 build2 (shift_code, vectype, new_temp, bitpos));
982 new_name = make_ssa_name (vec_dest, epilog_stmt);
983 TREE_OPERAND (epilog_stmt, 0) = new_name;
984 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
985 if (vect_print_dump_info (REPORT_DETAILS))
986 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
989 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
990 build2 (code, vectype, new_name, new_temp));
991 new_temp = make_ssa_name (vec_dest, epilog_stmt);
992 TREE_OPERAND (epilog_stmt, 0) = new_temp;
993 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
994 if (vect_print_dump_info (REPORT_DETAILS))
995 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
998 extract_scalar_result = true;
999 adjust_in_epilog = true;
1005 for (offset=0; offset<vector_size; offset+=element_size;)
1007 Create: s' = extract_field <v_out2, offset>
1008 Create: s = op <s, s'>
1011 if (vect_print_dump_info (REPORT_DETAILS))
1012 fprintf (vect_dump, "Reduce using scalar code. ");
1014 vec_temp = PHI_RESULT (new_phi);
1015 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1017 /* first iteration is peeled out when possible to minimize
1018 the number of operations we generate: */
1019 if (code == PLUS_EXPR
1020 && (integer_zerop (scalar_initial_def)
1021 || real_zerop (scalar_initial_def)))
1023 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1024 build3 (BIT_FIELD_REF, scalar_type,
1025 vec_temp, bitsize, bitsize_zero_node));
1026 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1027 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1028 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1029 if (vect_print_dump_info (REPORT_DETAILS))
1030 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1032 bit_offset = element_bitsize;
1036 new_temp = scalar_initial_def;
1041 bit_offset < vec_size_in_bits;
1042 bit_offset += element_bitsize)
1044 tree bitpos = bitsize_int (bit_offset);
1046 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1047 build3 (BIT_FIELD_REF, scalar_type,
1048 vec_temp, bitsize, bitpos));
1049 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1050 TREE_OPERAND (epilog_stmt, 0) = new_name;
1051 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1052 if (vect_print_dump_info (REPORT_DETAILS))
1053 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1056 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1057 build2 (code, scalar_type, new_name, new_temp));
1058 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1059 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1060 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1061 if (vect_print_dump_info (REPORT_DETAILS))
1062 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1065 extract_scalar_result = false;
1066 adjust_in_epilog = false;
1071 /* 2.3 Extract the final scalar result. Create:
1072 s_out3 = extract_field <v_out2, bitpos> */
1074 if (extract_scalar_result)
1076 if (vect_print_dump_info (REPORT_DETAILS))
1077 fprintf (vect_dump, "extract scalar result");
1079 /* The result is in the low order bits. */
1080 if (BITS_BIG_ENDIAN)
1081 bitpos = size_binop (MULT_EXPR,
1082 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1083 TYPE_SIZE (scalar_type));
1085 bitpos = bitsize_zero_node;
1087 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1088 build3 (BIT_FIELD_REF, scalar_type,
1089 new_temp, bitsize, bitpos));
1090 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1091 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1092 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1093 if (vect_print_dump_info (REPORT_DETAILS))
1094 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1098 /* 2.4 Adjust the final result by the initial value of the reduction
1099 variable. (when such adjustment is not needed, then
1100 'scalar_initial_def' is zero).
1103 s_out = scalar_expr <s_out, scalar_initial_def> */
1105 if (adjust_in_epilog)
1107 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1108 build2 (code, scalar_type, new_temp, scalar_initial_def));
1109 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1110 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1111 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1113 if (vect_print_dump_info (REPORT_DETAILS))
1114 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1118 /* 2.5 Replace uses of s_out0 with uses of s_out3 */
1120 /* Find the loop-closed-use at the loop exit of the original
1121 scalar result. (The reduction result is expected to have
1122 two immediate uses - one at the latch block, and one at the
1125 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1127 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1129 exit_phi = USE_STMT (use_p);
1134 orig_name = PHI_RESULT (exit_phi);
1136 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1137 SET_USE (use_p, new_temp);
1141 /* Function vectorizable_reduction.
1143 Check if STMT performs a reduction operation that can be vectorized.
1144 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1145 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1146 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1149 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1155 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1156 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1157 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1158 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1160 enum tree_code code, reduc_code = 0;
1161 enum machine_mode vec_mode;
1163 optab optab, reduc_optab;
1165 tree def0, def1, def_stmt0, def_stmt1;
1166 enum vect_def_type dt0, dt1;
1169 bool is_simple_use0;
1170 bool is_simple_use1;
1172 /* Is vectorizable reduction? */
1174 /* Not supportable if the reduction variable is used in the loop. */
1175 if (STMT_VINFO_RELEVANT_P (stmt_info))
1178 if (!STMT_VINFO_LIVE_P (stmt_info))
1181 /* Make sure it was already recognized as a reduction pattern. */
1182 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1185 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1187 operation = TREE_OPERAND (stmt, 1);
1188 code = TREE_CODE (operation);
1189 op_type = TREE_CODE_LENGTH (code);
1191 if (op_type != binary_op)
1194 op0 = TREE_OPERAND (operation, 0);
1195 op1 = TREE_OPERAND (operation, 1);
1196 scalar_dest = TREE_OPERAND (stmt, 0);
1197 scalar_type = TREE_TYPE (scalar_dest);
1199 /* Check the first operand. It is expected to be defined inside the loop. */
1201 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1203 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1205 gcc_assert (is_simple_use0);
1206 gcc_assert (is_simple_use1);
1207 gcc_assert (dt0 == vect_loop_def);
1208 gcc_assert (dt1 == vect_reduction_def);
1209 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1210 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1212 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1215 /* Supportable by target? */
1217 /* check support for the operation in the loop */
1218 optab = optab_for_tree_code (code, vectype);
1221 if (vect_print_dump_info (REPORT_DETAILS))
1222 fprintf (vect_dump, "no optab.");
1225 vec_mode = TYPE_MODE (vectype);
1226 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1228 if (vect_print_dump_info (REPORT_DETAILS))
1229 fprintf (vect_dump, "op not supported by target.");
1230 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1231 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1232 < vect_min_worthwhile_factor (code))
1234 if (vect_print_dump_info (REPORT_DETAILS))
1235 fprintf (vect_dump, "proceeding using word mode.");
1238 /* Worthwhile without SIMD support? */
1239 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1240 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1241 < vect_min_worthwhile_factor (code))
1243 if (vect_print_dump_info (REPORT_DETAILS))
1244 fprintf (vect_dump, "not worthwhile without SIMD support.");
1248 /* check support for the epilog operation */
1249 if (!reduction_code_for_scalar_code (code, &reduc_code))
1251 reduc_optab = optab_for_tree_code (reduc_code, vectype);
1254 if (vect_print_dump_info (REPORT_DETAILS))
1255 fprintf (vect_dump, "no optab for reduction.");
1256 reduc_code = NUM_TREE_CODES;
1258 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1260 if (vect_print_dump_info (REPORT_DETAILS))
1261 fprintf (vect_dump, "reduc op not supported by target.");
1262 reduc_code = NUM_TREE_CODES;
1265 if (!vec_stmt) /* transformation not required. */
1267 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1273 if (vect_print_dump_info (REPORT_DETAILS))
1274 fprintf (vect_dump, "transform reduction.");
1276 /* Create the destination vector */
1277 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1280 /* Create the reduction-phi that defines the reduction-operand. */
1281 new_phi = create_phi_node (vec_dest, loop->header);
1284 /* Prepare the operand that is defined inside the loop body */
1285 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1286 gcc_assert (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (loop_vec_def))));
1289 /* Create the vectorized operation that computes the partial results */
1290 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1291 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1292 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1293 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1294 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1297 /* Finalize the reduction-phi (set it's arguments) and create the
1298 epilog reduction code. */
1299 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1304 /* Function vectorizable_assignment.
1306 Check if STMT performs an assignment (copy) that can be vectorized.
1307 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1308 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1309 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1312 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1318 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1319 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1320 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1323 enum vect_def_type dt;
1325 /* Is vectorizable assignment? */
1326 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1329 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1331 if (TREE_CODE (stmt) != MODIFY_EXPR)
1334 scalar_dest = TREE_OPERAND (stmt, 0);
1335 if (TREE_CODE (scalar_dest) != SSA_NAME)
1338 op = TREE_OPERAND (stmt, 1);
1339 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1341 if (vect_print_dump_info (REPORT_DETAILS))
1342 fprintf (vect_dump, "use not simple.");
1346 if (!vec_stmt) /* transformation not required. */
1348 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1353 if (vect_print_dump_info (REPORT_DETAILS))
1354 fprintf (vect_dump, "transform assignment.");
1357 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1360 op = TREE_OPERAND (stmt, 1);
1361 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1363 /* Arguments are ready. create the new vector stmt. */
1364 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1365 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1366 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1367 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1373 /* Function vect_min_worthwhile_factor.
1375 For a loop where we could vectorize the operation indicated by CODE,
1376 return the minimum vectorization factor that makes it worthwhile
1377 to use generic vectors. */
1379 vect_min_worthwhile_factor (enum tree_code code)
1400 /* Function vectorizable_operation.
1402 Check if STMT performs a binary or unary operation that can be vectorized.
1403 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1404 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1405 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1408 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1413 tree op0, op1 = NULL;
1414 tree vec_oprnd0, vec_oprnd1=NULL;
1415 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1416 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1417 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1419 enum tree_code code;
1420 enum machine_mode vec_mode;
1426 enum vect_def_type dt;
1428 /* Is STMT a vectorizable binary/unary operation? */
1429 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1432 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1434 if (STMT_VINFO_LIVE_P (stmt_info))
1436 /* FORNOW: not yet supported. */
1437 if (vect_print_dump_info (REPORT_DETAILS))
1438 fprintf (vect_dump, "value used after loop.");
1442 if (TREE_CODE (stmt) != MODIFY_EXPR)
1445 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1448 operation = TREE_OPERAND (stmt, 1);
1449 code = TREE_CODE (operation);
1450 optab = optab_for_tree_code (code, vectype);
1452 /* Support only unary or binary operations. */
1453 op_type = TREE_CODE_LENGTH (code);
1454 if (op_type != unary_op && op_type != binary_op)
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1461 for (i = 0; i < op_type; i++)
1463 op = TREE_OPERAND (operation, i);
1464 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1466 if (vect_print_dump_info (REPORT_DETAILS))
1467 fprintf (vect_dump, "use not simple.");
1472 /* Supportable by target? */
1475 if (vect_print_dump_info (REPORT_DETAILS))
1476 fprintf (vect_dump, "no optab.");
1479 vec_mode = TYPE_MODE (vectype);
1480 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1482 if (vect_print_dump_info (REPORT_DETAILS))
1483 fprintf (vect_dump, "op not supported by target.");
1484 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1485 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1486 < vect_min_worthwhile_factor (code))
1488 if (vect_print_dump_info (REPORT_DETAILS))
1489 fprintf (vect_dump, "proceeding using word mode.");
1492 /* Worthwhile without SIMD support? */
1493 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1494 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1495 < vect_min_worthwhile_factor (code))
1497 if (vect_print_dump_info (REPORT_DETAILS))
1498 fprintf (vect_dump, "not worthwhile without SIMD support.");
1502 if (!vec_stmt) /* transformation not required. */
1504 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1510 if (vect_print_dump_info (REPORT_DETAILS))
1511 fprintf (vect_dump, "transform binary/unary operation.");
1514 scalar_dest = TREE_OPERAND (stmt, 0);
1515 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1518 op0 = TREE_OPERAND (operation, 0);
1519 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1521 if (op_type == binary_op)
1523 op1 = TREE_OPERAND (operation, 1);
1524 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1527 /* Arguments are ready. create the new vector stmt. */
1529 if (op_type == binary_op)
1530 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1531 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1533 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1534 build1 (code, vectype, vec_oprnd0));
1535 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1536 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1537 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1543 /* Function vectorizable_store.
1545 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1547 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1548 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1549 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1552 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1558 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1559 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1560 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1561 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1562 enum machine_mode vec_mode;
1564 enum dr_alignment_support alignment_support_cheme;
1567 enum vect_def_type dt;
1569 /* Is vectorizable store? */
1571 if (TREE_CODE (stmt) != MODIFY_EXPR)
1574 scalar_dest = TREE_OPERAND (stmt, 0);
1575 if (TREE_CODE (scalar_dest) != ARRAY_REF
1576 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1579 op = TREE_OPERAND (stmt, 1);
1580 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1582 if (vect_print_dump_info (REPORT_DETAILS))
1583 fprintf (vect_dump, "use not simple.");
1587 vec_mode = TYPE_MODE (vectype);
1588 /* FORNOW. In some cases can vectorize even if data-type not supported
1589 (e.g. - array initialization with 0). */
1590 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1593 if (!STMT_VINFO_DATA_REF (stmt_info))
1597 if (!vec_stmt) /* transformation not required. */
1599 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1605 if (vect_print_dump_info (REPORT_DETAILS))
1606 fprintf (vect_dump, "transform store");
1608 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1609 gcc_assert (alignment_support_cheme);
1610 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1612 /* Handle use - get the vectorized def from the defining stmt. */
1613 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1616 /* FORNOW: make sure the data reference is aligned. */
1617 vect_align_data_ref (stmt);
1618 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1619 data_ref = build_fold_indirect_ref (data_ref);
1621 /* Arguments are ready. create the new vector stmt. */
1622 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1623 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1625 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1626 element's definition to the vector's definition then update the
1627 defining statement. The original is being deleted so the same
1628 SSA_NAMEs can be used. */
1629 copy_virtual_operands (*vec_stmt, stmt);
1631 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1633 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1635 /* If this virtual def has a use outside the loop and a loop peel is
1636 performed then the def may be renamed by the peel. Mark it for
1637 renaming so the later use will also be renamed. */
1638 mark_sym_for_renaming (SSA_NAME_VAR (def));
1645 /* vectorizable_load.
1647 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1649 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1650 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1651 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1654 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1657 tree vec_dest = NULL;
1658 tree data_ref = NULL;
1660 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1661 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1662 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1669 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1670 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1671 edge pe = loop_preheader_edge (loop);
1672 enum dr_alignment_support alignment_support_cheme;
1674 /* Is vectorizable load? */
1675 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1678 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1680 if (STMT_VINFO_LIVE_P (stmt_info))
1682 /* FORNOW: not yet supported. */
1683 if (vect_print_dump_info (REPORT_DETAILS))
1684 fprintf (vect_dump, "value used after loop.");
1688 if (TREE_CODE (stmt) != MODIFY_EXPR)
1691 scalar_dest = TREE_OPERAND (stmt, 0);
1692 if (TREE_CODE (scalar_dest) != SSA_NAME)
1695 op = TREE_OPERAND (stmt, 1);
1696 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1699 if (!STMT_VINFO_DATA_REF (stmt_info))
1702 mode = (int) TYPE_MODE (vectype);
1704 /* FORNOW. In some cases can vectorize even if data-type not supported
1705 (e.g. - data copies). */
1706 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1708 if (vect_print_dump_info (REPORT_DETAILS))
1709 fprintf (vect_dump, "Aligned load, but unsupported type.");
1713 if (!vec_stmt) /* transformation not required. */
1715 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1721 if (vect_print_dump_info (REPORT_DETAILS))
1722 fprintf (vect_dump, "transform load.");
1724 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1725 gcc_assert (alignment_support_cheme);
1727 if (alignment_support_cheme == dr_aligned
1728 || alignment_support_cheme == dr_unaligned_supported)
1739 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1740 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1741 if (aligned_access_p (dr))
1742 data_ref = build_fold_indirect_ref (data_ref);
1745 int mis = DR_MISALIGNMENT (dr);
1746 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1747 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1748 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1750 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1751 new_temp = make_ssa_name (vec_dest, new_stmt);
1752 TREE_OPERAND (new_stmt, 0) = new_temp;
1753 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1754 copy_virtual_operands (new_stmt, stmt);
1756 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1760 msq_init = *(floor(p1))
1761 p2 = initial_addr + VS - 1;
1762 magic = have_builtin ? builtin_result : initial_address;
1765 p2' = p2 + indx * vectype_size
1767 vec_dest = realign_load (msq, lsq, magic)
1781 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1782 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1783 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1785 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1786 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1787 new_temp = make_ssa_name (vec_dest, new_stmt);
1788 TREE_OPERAND (new_stmt, 0) = new_temp;
1789 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1790 gcc_assert (!new_bb);
1791 msq_init = TREE_OPERAND (new_stmt, 0);
1792 copy_virtual_operands (new_stmt, stmt);
1793 update_vuses_to_preheader (new_stmt, loop);
1796 /* <2> Create lsq = *(floor(p2')) in the loop */
1797 offset = build_int_cst (integer_type_node,
1798 TYPE_VECTOR_SUBPARTS (vectype));
1799 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1800 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1801 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1802 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1803 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1804 new_temp = make_ssa_name (vec_dest, new_stmt);
1805 TREE_OPERAND (new_stmt, 0) = new_temp;
1806 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1807 lsq = TREE_OPERAND (new_stmt, 0);
1808 copy_virtual_operands (new_stmt, stmt);
1812 if (targetm.vectorize.builtin_mask_for_load)
1814 /* Create permutation mask, if required, in loop preheader. */
1816 params = build_tree_list (NULL_TREE, init_addr);
1817 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1818 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1819 new_stmt = build_function_call_expr (builtin_decl, params);
1820 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1821 new_temp = make_ssa_name (vec_dest, new_stmt);
1822 TREE_OPERAND (new_stmt, 0) = new_temp;
1823 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1824 gcc_assert (!new_bb);
1825 magic = TREE_OPERAND (new_stmt, 0);
1827 /* The result of the CALL_EXPR to this builtin is determined from
1828 the value of the parameter and no global variables are touched
1829 which makes the builtin a "const" function. Requiring the
1830 builtin to have the "const" attribute makes it unnecessary
1831 to call mark_call_clobbered_vars_to_rename. */
1832 gcc_assert (TREE_READONLY (builtin_decl));
1836 /* Use current address instead of init_addr for reduced reg pressure.
1838 magic = dataref_ptr;
1842 /* <4> Create msq = phi <msq_init, lsq> in loop */
1843 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1844 msq = make_ssa_name (vec_dest, NULL_TREE);
1845 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1846 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1847 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1848 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1851 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1852 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1853 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1854 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1855 new_temp = make_ssa_name (vec_dest, new_stmt);
1856 TREE_OPERAND (new_stmt, 0) = new_temp;
1857 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1862 *vec_stmt = new_stmt;
1867 /* Function vectorizable_live_operation.
1869 STMT computes a value that is used outside the loop. Check if
1870 it can be supported. */
1873 vectorizable_live_operation (tree stmt,
1874 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1875 tree *vec_stmt ATTRIBUTE_UNUSED)
1878 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1879 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1881 enum tree_code code;
1885 enum vect_def_type dt;
1887 if (!STMT_VINFO_LIVE_P (stmt_info))
1890 if (TREE_CODE (stmt) != MODIFY_EXPR)
1893 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1896 operation = TREE_OPERAND (stmt, 1);
1897 code = TREE_CODE (operation);
1899 op_type = TREE_CODE_LENGTH (code);
1901 /* FORNOW: support only if all uses are invariant. This means
1902 that the scalar operations can remain in place, unvectorized.
1903 The original last scalar value that they compute will be used. */
1905 for (i = 0; i < op_type; i++)
1907 op = TREE_OPERAND (operation, i);
1908 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1910 if (vect_print_dump_info (REPORT_DETAILS))
1911 fprintf (vect_dump, "use not simple.");
1915 if (dt != vect_invariant_def && dt != vect_constant_def)
1919 /* No transformation is required for the cases we currently support. */
1924 /* Function vect_is_simple_cond.
1927 LOOP - the loop that is being vectorized.
1928 COND - Condition that is checked for simple use.
1930 Returns whether a COND can be vectorized. Checks whether
1931 condition operands are supportable using vec_is_simple_use. */
1934 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1938 enum vect_def_type dt;
1940 if (!COMPARISON_CLASS_P (cond))
1943 lhs = TREE_OPERAND (cond, 0);
1944 rhs = TREE_OPERAND (cond, 1);
1946 if (TREE_CODE (lhs) == SSA_NAME)
1948 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1949 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1952 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1955 if (TREE_CODE (rhs) == SSA_NAME)
1957 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1958 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1961 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1967 /* vectorizable_condition.
1969 Check if STMT is conditional modify expression that can be vectorized.
1970 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1971 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
1974 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1977 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1979 tree scalar_dest = NULL_TREE;
1980 tree vec_dest = NULL_TREE;
1981 tree op = NULL_TREE;
1982 tree cond_expr, then_clause, else_clause;
1983 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1984 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1985 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
1986 tree vec_compare, vec_cond_expr;
1988 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1989 enum machine_mode vec_mode;
1991 enum vect_def_type dt;
1993 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1996 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1998 if (STMT_VINFO_LIVE_P (stmt_info))
2000 /* FORNOW: not yet supported. */
2001 if (vect_print_dump_info (REPORT_DETAILS))
2002 fprintf (vect_dump, "value used after loop.");
2006 if (TREE_CODE (stmt) != MODIFY_EXPR)
2009 op = TREE_OPERAND (stmt, 1);
2011 if (TREE_CODE (op) != COND_EXPR)
2014 cond_expr = TREE_OPERAND (op, 0);
2015 then_clause = TREE_OPERAND (op, 1);
2016 else_clause = TREE_OPERAND (op, 2);
2018 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2021 if (TREE_CODE (then_clause) == SSA_NAME)
2023 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2024 if (!vect_is_simple_use (then_clause, loop_vinfo,
2025 &then_def_stmt, &def, &dt))
2028 else if (TREE_CODE (then_clause) != INTEGER_CST
2029 && TREE_CODE (then_clause) != REAL_CST)
2032 if (TREE_CODE (else_clause) == SSA_NAME)
2034 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2035 if (!vect_is_simple_use (else_clause, loop_vinfo,
2036 &else_def_stmt, &def, &dt))
2039 else if (TREE_CODE (else_clause) != INTEGER_CST
2040 && TREE_CODE (else_clause) != REAL_CST)
2044 vec_mode = TYPE_MODE (vectype);
2048 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2049 return expand_vec_cond_expr_p (op, vec_mode);
2055 scalar_dest = TREE_OPERAND (stmt, 0);
2056 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2058 /* Handle cond expr. */
2060 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2062 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2063 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2064 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2066 /* Arguments are ready. create the new vector stmt. */
2067 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2068 vec_cond_lhs, vec_cond_rhs);
2069 vec_cond_expr = build (VEC_COND_EXPR, vectype,
2070 vec_compare, vec_then_clause, vec_else_clause);
2072 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2073 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2074 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2075 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2080 /* Function vect_transform_stmt.
2082 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2085 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2087 bool is_store = false;
2088 tree vec_stmt = NULL_TREE;
2089 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2092 if (STMT_VINFO_RELEVANT_P (stmt_info))
2094 switch (STMT_VINFO_TYPE (stmt_info))
2096 case op_vec_info_type:
2097 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2101 case assignment_vec_info_type:
2102 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2106 case load_vec_info_type:
2107 done = vectorizable_load (stmt, bsi, &vec_stmt);
2111 case store_vec_info_type:
2112 done = vectorizable_store (stmt, bsi, &vec_stmt);
2117 case condition_vec_info_type:
2118 done = vectorizable_condition (stmt, bsi, &vec_stmt);
2123 if (vect_print_dump_info (REPORT_DETAILS))
2124 fprintf (vect_dump, "stmt not supported.");
2128 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2131 if (STMT_VINFO_LIVE_P (stmt_info))
2133 switch (STMT_VINFO_TYPE (stmt_info))
2135 case reduc_vec_info_type:
2136 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2141 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2147 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2148 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2156 /* This function builds ni_name = number of iterations loop executes
2157 on the loop preheader. */
2160 vect_build_loop_niters (loop_vec_info loop_vinfo)
2162 tree ni_name, stmt, var;
2164 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2165 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2167 var = create_tmp_var (TREE_TYPE (ni), "niters");
2168 add_referenced_tmp_var (var);
2169 ni_name = force_gimple_operand (ni, &stmt, false, var);
2171 pe = loop_preheader_edge (loop);
2174 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2175 gcc_assert (!new_bb);
2182 /* This function generates the following statements:
2184 ni_name = number of iterations loop executes
2185 ratio = ni_name / vf
2186 ratio_mult_vf_name = ratio * vf
2188 and places them at the loop preheader edge. */
2191 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2193 tree *ratio_mult_vf_name_ptr,
2194 tree *ratio_name_ptr)
2202 tree ratio_mult_vf_name;
2203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2204 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2205 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2206 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2208 pe = loop_preheader_edge (loop);
2210 /* Generate temporary variable that contains
2211 number of iterations loop executes. */
2213 ni_name = vect_build_loop_niters (loop_vinfo);
2215 /* Create: ratio = ni >> log2(vf) */
2217 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2218 add_referenced_tmp_var (var);
2219 ratio_name = make_ssa_name (var, NULL_TREE);
2220 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2221 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2222 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2224 pe = loop_preheader_edge (loop);
2225 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2226 gcc_assert (!new_bb);
2228 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2230 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2231 add_referenced_tmp_var (var);
2232 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2233 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2234 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2235 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2237 pe = loop_preheader_edge (loop);
2238 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2239 gcc_assert (!new_bb);
2241 *ni_name_ptr = ni_name;
2242 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2243 *ratio_name_ptr = ratio_name;
2249 /* Function update_vuses_to_preheader.
2252 STMT - a statement with potential VUSEs.
2253 LOOP - the loop whose preheader will contain STMT.
2255 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2256 appears to be defined in a V_MAY_DEF in another statement in a loop.
2257 One such case is when the VUSE is at the dereference of a __restricted__
2258 pointer in a load and the V_MAY_DEF is at the dereference of a different
2259 __restricted__ pointer in a store. Vectorization may result in
2260 copy_virtual_uses being called to copy the problematic VUSE to a new
2261 statement that is being inserted in the loop preheader. This procedure
2262 is called to change the SSA_NAME in the new statement's VUSE from the
2263 SSA_NAME updated in the loop to the related SSA_NAME available on the
2264 path entering the loop.
2266 When this function is called, we have the following situation:
2271 # name1 = phi < name0 , name2>
2276 # name2 = vdef <name1>
2281 Stmt S1 was created in the loop preheader block as part of misaligned-load
2282 handling. This function fixes the name of the vuse of S1 from 'name1' to
2286 update_vuses_to_preheader (tree stmt, struct loop *loop)
2288 basic_block header_bb = loop->header;
2289 edge preheader_e = loop_preheader_edge (loop);
2291 use_operand_p use_p;
2293 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2295 tree ssa_name = USE_FROM_PTR (use_p);
2296 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2297 tree name_var = SSA_NAME_VAR (ssa_name);
2298 basic_block bb = bb_for_stmt (def_stmt);
2300 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2301 if (!IS_EMPTY_STMT (def_stmt)
2302 && flow_bb_inside_loop_p (loop, bb))
2304 /* If the block containing the statement defining the SSA_NAME
2305 is in the loop then it's necessary to find the definition
2306 outside the loop using the PHI nodes of the header. */
2308 bool updated = false;
2310 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2312 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2314 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2319 gcc_assert (updated);
2325 /* Function vect_update_ivs_after_vectorizer.
2327 "Advance" the induction variables of LOOP to the value they should take
2328 after the execution of LOOP. This is currently necessary because the
2329 vectorizer does not handle induction variables that are used after the
2330 loop. Such a situation occurs when the last iterations of LOOP are
2332 1. We introduced new uses after LOOP for IVs that were not originally used
2333 after LOOP: the IVs of LOOP are now used by an epilog loop.
2334 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2335 times, whereas the loop IVs should be bumped N times.
2338 - LOOP - a loop that is going to be vectorized. The last few iterations
2339 of LOOP were peeled.
2340 - NITERS - the number of iterations that LOOP executes (before it is
2341 vectorized). i.e, the number of times the ivs should be bumped.
2342 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2343 coming out from LOOP on which there are uses of the LOOP ivs
2344 (this is the path from LOOP->exit to epilog_loop->preheader).
2346 The new definitions of the ivs are placed in LOOP->exit.
2347 The phi args associated with the edge UPDATE_E in the bb
2348 UPDATE_E->dest are updated accordingly.
2350 Assumption 1: Like the rest of the vectorizer, this function assumes
2351 a single loop exit that has a single predecessor.
2353 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2354 organized in the same order.
2356 Assumption 3: The access function of the ivs is simple enough (see
2357 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2359 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2360 coming out of LOOP on which the ivs of LOOP are used (this is the path
2361 that leads to the epilog loop; other paths skip the epilog loop). This
2362 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2363 needs to have its phis updated.
2367 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2370 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2371 basic_block exit_bb = loop->single_exit->dest;
2373 basic_block update_bb = update_e->dest;
2375 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2377 /* Make sure there exists a single-predecessor exit bb: */
2378 gcc_assert (single_pred_p (exit_bb));
2380 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2382 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2384 tree access_fn = NULL;
2385 tree evolution_part;
2388 tree var, stmt, ni, ni_name;
2389 block_stmt_iterator last_bsi;
2391 if (vect_print_dump_info (REPORT_DETAILS))
2393 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2394 print_generic_expr (vect_dump, phi, TDF_SLIM);
2397 /* Skip virtual phi's. */
2398 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2400 if (vect_print_dump_info (REPORT_DETAILS))
2401 fprintf (vect_dump, "virtual phi. skip.");
2405 /* Skip reduction phis. */
2406 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2408 if (vect_print_dump_info (REPORT_DETAILS))
2409 fprintf (vect_dump, "reduc phi. skip.");
2413 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2414 gcc_assert (access_fn);
2416 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2417 gcc_assert (evolution_part != NULL_TREE);
2419 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2420 of degree >= 2 or exponential. */
2421 gcc_assert (!tree_is_chrec (evolution_part));
2423 step_expr = evolution_part;
2424 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2427 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2428 build2 (MULT_EXPR, TREE_TYPE (niters),
2429 niters, step_expr), init_expr);
2431 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2432 add_referenced_tmp_var (var);
2434 ni_name = force_gimple_operand (ni, &stmt, false, var);
2436 /* Insert stmt into exit_bb. */
2437 last_bsi = bsi_last (exit_bb);
2439 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2441 /* Fix phi expressions in the successor bb. */
2442 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2447 /* Function vect_do_peeling_for_loop_bound
2449 Peel the last iterations of the loop represented by LOOP_VINFO.
2450 The peeled iterations form a new epilog loop. Given that the loop now
2451 iterates NITERS times, the new epilog loop iterates
2452 NITERS % VECTORIZATION_FACTOR times.
2454 The original loop will later be made to iterate
2455 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2458 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2459 struct loops *loops)
2461 tree ni_name, ratio_mult_vf_name;
2462 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2463 struct loop *new_loop;
2465 basic_block preheader;
2468 if (vect_print_dump_info (REPORT_DETAILS))
2469 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2471 initialize_original_copy_tables ();
2473 /* Generate the following variables on the preheader of original loop:
2475 ni_name = number of iteration the original loop executes
2476 ratio = ni_name / vf
2477 ratio_mult_vf_name = ratio * vf */
2478 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2479 &ratio_mult_vf_name, ratio);
2481 loop_num = loop->num;
2482 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2483 ratio_mult_vf_name, ni_name, false);
2484 gcc_assert (new_loop);
2485 gcc_assert (loop_num == loop->num);
2486 #ifdef ENABLE_CHECKING
2487 slpeel_verify_cfg_after_peeling (loop, new_loop);
2490 /* A guard that controls whether the new_loop is to be executed or skipped
2491 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2492 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2493 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2494 is on the path where the LOOP IVs are used and need to be updated. */
2496 preheader = loop_preheader_edge (new_loop)->src;
2497 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2498 update_e = EDGE_PRED (preheader, 0);
2500 update_e = EDGE_PRED (preheader, 1);
2502 /* Update IVs of original loop as if they were advanced
2503 by ratio_mult_vf_name steps. */
2504 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2506 /* After peeling we have to reset scalar evolution analyzer. */
2509 free_original_copy_tables ();
2513 /* Function vect_gen_niters_for_prolog_loop
2515 Set the number of iterations for the loop represented by LOOP_VINFO
2516 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2517 and the misalignment of DR - the data reference recorded in
2518 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2519 this loop, the data reference DR will refer to an aligned location.
2521 The following computation is generated:
2523 If the misalignment of DR is known at compile time:
2524 addr_mis = int mis = DR_MISALIGNMENT (dr);
2525 Else, compute address misalignment in bytes:
2526 addr_mis = addr & (vectype_size - 1)
2528 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2530 (elem_size = element type size; an element is the scalar element
2531 whose type is the inner type of the vectype) */
2534 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2536 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2537 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2538 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2540 tree iters, iters_name;
2543 tree dr_stmt = DR_STMT (dr);
2544 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2545 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2546 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2547 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
2548 tree niters_type = TREE_TYPE (loop_niters);
2550 pe = loop_preheader_edge (loop);
2552 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2554 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2555 int element_size = vectype_align/vf;
2556 int elem_misalign = byte_misalign / element_size;
2558 if (vect_print_dump_info (REPORT_DETAILS))
2559 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2560 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2564 tree new_stmts = NULL_TREE;
2566 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2567 tree ptr_type = TREE_TYPE (start_addr);
2568 tree size = TYPE_SIZE (ptr_type);
2569 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2570 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2571 tree elem_size_log =
2572 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
2573 tree vf_tree = build_int_cst (unsigned_type_node, vf);
2577 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2578 gcc_assert (!new_bb);
2580 /* Create: byte_misalign = addr & (vectype_size - 1) */
2582 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2584 /* Create: elem_misalign = byte_misalign / element_size */
2586 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
2588 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2589 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
2590 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
2591 iters = fold_convert (niters_type, iters);
2594 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2595 /* If the loop bound is known at compile time we already verified that it is
2596 greater than vf; since the misalignment ('iters') is at most vf, there's
2597 no need to generate the MIN_EXPR in this case. */
2598 if (TREE_CODE (loop_niters) != INTEGER_CST)
2599 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2601 if (vect_print_dump_info (REPORT_DETAILS))
2603 fprintf (vect_dump, "niters for prolog loop: ");
2604 print_generic_expr (vect_dump, iters, TDF_SLIM);
2607 var = create_tmp_var (niters_type, "prolog_loop_niters");
2608 add_referenced_tmp_var (var);
2609 iters_name = force_gimple_operand (iters, &stmt, false, var);
2611 /* Insert stmt on loop preheader edge. */
2614 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2615 gcc_assert (!new_bb);
2622 /* Function vect_update_init_of_dr
2624 NITERS iterations were peeled from LOOP. DR represents a data reference
2625 in LOOP. This function updates the information recorded in DR to
2626 account for the fact that the first NITERS iterations had already been
2627 executed. Specifically, it updates the OFFSET field of stmt_info. */
2630 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2632 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
2633 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
2635 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters,
2636 STMT_VINFO_VECT_STEP (stmt_info));
2637 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2638 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
2642 /* Function vect_update_inits_of_drs
2644 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2645 This function updates the information recorded for the data references in
2646 the loop to account for the fact that the first NITERS iterations had
2647 already been executed. Specifically, it updates the initial_condition of the
2648 access_function of all the data_references in the loop. */
2651 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2654 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2655 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2657 if (vect_dump && (dump_flags & TDF_DETAILS))
2658 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2660 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2662 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2663 vect_update_init_of_dr (dr, niters);
2666 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2668 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2669 vect_update_init_of_dr (dr, niters);
2674 /* Function vect_do_peeling_for_alignment
2676 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2677 'niters' is set to the misalignment of one of the data references in the
2678 loop, thereby forcing it to refer to an aligned location at the beginning
2679 of the execution of this loop. The data reference for which we are
2680 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2683 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2685 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2686 tree niters_of_prolog_loop, ni_name;
2688 struct loop *new_loop;
2690 if (vect_print_dump_info (REPORT_DETAILS))
2691 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2693 initialize_original_copy_tables ();
2695 ni_name = vect_build_loop_niters (loop_vinfo);
2696 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2698 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2700 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2701 niters_of_prolog_loop, ni_name, true);
2702 gcc_assert (new_loop);
2703 #ifdef ENABLE_CHECKING
2704 slpeel_verify_cfg_after_peeling (new_loop, loop);
2707 /* Update number of times loop executes. */
2708 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2709 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2710 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2712 /* Update the init conditions of the access functions of all data refs. */
2713 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2715 /* After peeling we have to reset scalar evolution analyzer. */
2718 free_original_copy_tables ();
2722 /* Function vect_transform_loop.
2724 The analysis phase has determined that the loop is vectorizable.
2725 Vectorize the loop - created vectorized stmts to replace the scalar
2726 stmts in the loop, and update the loop exit condition. */
2729 vect_transform_loop (loop_vec_info loop_vinfo,
2730 struct loops *loops ATTRIBUTE_UNUSED)
2732 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2733 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2734 int nbbs = loop->num_nodes;
2735 block_stmt_iterator si;
2738 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2740 if (vect_print_dump_info (REPORT_DETAILS))
2741 fprintf (vect_dump, "=== vec_transform_loop ===");
2743 /* Peel the loop if there are data refs with unknown alignment.
2744 Only one data ref with unknown store is allowed. */
2746 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2747 vect_do_peeling_for_alignment (loop_vinfo, loops);
2749 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2750 compile time constant), or it is a constant that doesn't divide by the
2751 vectorization factor, then an epilog loop needs to be created.
2752 We therefore duplicate the loop: the original loop will be vectorized,
2753 and will compute the first (n/VF) iterations. The second copy of the loop
2754 will remain scalar and will compute the remaining (n%VF) iterations.
2755 (VF is the vectorization factor). */
2757 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2758 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2759 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2760 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2762 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2763 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2765 /* 1) Make sure the loop header has exactly two entries
2766 2) Make sure we have a preheader basic block. */
2768 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2770 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2773 /* FORNOW: the vectorizer supports only loops which body consist
2774 of one basic block (header + empty latch). When the vectorizer will
2775 support more involved loop forms, the order by which the BBs are
2776 traversed need to be reconsidered. */
2778 for (i = 0; i < nbbs; i++)
2780 basic_block bb = bbs[i];
2782 for (si = bsi_start (bb); !bsi_end_p (si);)
2784 tree stmt = bsi_stmt (si);
2785 stmt_vec_info stmt_info;
2788 if (vect_print_dump_info (REPORT_DETAILS))
2790 fprintf (vect_dump, "------>vectorizing statement: ");
2791 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2793 stmt_info = vinfo_for_stmt (stmt);
2794 gcc_assert (stmt_info);
2795 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2796 && !STMT_VINFO_LIVE_P (stmt_info))
2801 /* FORNOW: Verify that all stmts operate on the same number of
2802 units and no inner unrolling is necessary. */
2804 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2805 == (unsigned HOST_WIDE_INT) vectorization_factor);
2807 /* -------- vectorize statement ------------ */
2808 if (vect_print_dump_info (REPORT_DETAILS))
2809 fprintf (vect_dump, "transform statement.");
2811 is_store = vect_transform_stmt (stmt, &si);
2814 /* Free the attached stmt_vec_info and remove the stmt. */
2815 stmt_ann_t ann = stmt_ann (stmt);
2817 set_stmt_info ((tree_ann_t)ann, NULL);
2826 slpeel_make_loop_iterate_ntimes (loop, ratio);
2828 /* The memory tags and pointers in vectorized statements need to
2829 have their SSA forms updated. FIXME, why can't this be delayed
2830 until all the loops have been transformed? */
2831 update_ssa (TODO_update_ssa);
2833 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
2834 fprintf (vect_dump, "LOOP VECTORIZED.");