1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005,2006 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, block_stmt_iterator *, tree, tree *, tree *, bool);
53 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
54 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void update_vuses_to_preheader (tree, struct loop*);
62 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
63 static tree get_initial_def_for_reduction (tree, tree, tree *);
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info, tree *, tree *, tree *);
68 static tree vect_build_loop_niters (loop_vec_info);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71 static void vect_update_init_of_dr (struct data_reference *, tree niters);
72 static void vect_update_inits_of_drs (loop_vec_info, tree);
73 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
74 static void vect_do_peeling_for_loop_bound
75 (loop_vec_info, tree *, struct loops *);
76 static int vect_min_worthwhile_factor (enum tree_code);
79 /* Function vect_get_new_vect_var.
81 Returns a name for a new variable. The current naming scheme appends the
82 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83 the name of vectorizer generated variables, and appends that to NAME if
87 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
100 case vect_pointer_var:
108 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
110 new_vect_var = create_tmp_var (type, prefix);
116 /* Function vect_create_addr_base_for_vector_ref.
118 Create an expression that computes the address of the first memory location
119 that will be accessed for a data reference.
122 STMT: The statement containing the data reference.
123 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
124 OFFSET: Optional. If supplied, it is be added to the initial address.
127 1. Return an SSA_NAME whose value is the address of the memory location of
128 the first vector of the data reference.
129 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
130 these statement(s) which define the returned SSA_NAME.
132 FORNOW: We are only handling array accesses with step 1. */
135 vect_create_addr_base_for_vector_ref (tree stmt,
139 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
140 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
141 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
142 tree base_name = build_fold_indirect_ref (data_ref_base);
143 tree ref = DR_REF (dr);
144 tree scalar_type = TREE_TYPE (ref);
145 tree scalar_ptr_type = build_pointer_type (scalar_type);
148 tree addr_base, addr_expr;
150 tree base_offset = unshare_expr (DR_OFFSET (dr));
151 tree init = unshare_expr (DR_INIT (dr));
153 /* Create base_offset */
154 base_offset = size_binop (PLUS_EXPR, base_offset, init);
155 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
156 add_referenced_var (dest);
157 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
158 append_to_statement_list_force (new_stmt, new_stmt_list);
162 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
163 add_referenced_var (tmp);
164 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
166 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
167 base_offset, offset);
168 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
169 append_to_statement_list_force (new_stmt, new_stmt_list);
172 /* base + base_offset */
173 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
176 /* addr_expr = addr_base */
177 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
178 get_name (base_name));
179 add_referenced_var (addr_expr);
180 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
181 new_temp = make_ssa_name (addr_expr, vec_stmt);
182 TREE_OPERAND (vec_stmt, 0) = new_temp;
183 append_to_statement_list_force (vec_stmt, new_stmt_list);
185 if (vect_print_dump_info (REPORT_DETAILS))
187 fprintf (vect_dump, "created ");
188 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
194 /* Function vect_create_data_ref_ptr.
196 Create a new pointer to vector type (vp), that points to the first location
197 accessed in the loop by STMT, along with the def-use update chain to
198 appropriately advance the pointer through the loop iterations. Also set
199 aliasing information for the pointer. This vector pointer is used by the
200 callers to this function to create a memory reference expression for vector
204 1. STMT: a stmt that references memory. Expected to be of the form
205 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
206 2. BSI: block_stmt_iterator where new stmts can be added.
207 3. OFFSET (optional): an offset to be added to the initial address accessed
208 by the data-ref in STMT.
209 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
210 pointing to the initial address.
213 1. Declare a new ptr to vector_type, and have it point to the base of the
214 data reference (initial addressed accessed by the data reference).
215 For example, for vector of type V8HI, the following code is generated:
218 vp = (v8hi *)initial_address;
220 if OFFSET is not supplied:
221 initial_address = &a[init];
222 if OFFSET is supplied:
223 initial_address = &a[init + OFFSET];
225 Return the initial_address in INITIAL_ADDRESS.
227 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
228 update the pointer in each iteration of the loop.
230 Return the increment stmt that updates the pointer in PTR_INCR.
232 3. Return the pointer. */
235 vect_create_data_ref_ptr (tree stmt,
236 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
237 tree offset, tree *initial_address, tree *ptr_incr,
241 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
242 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
243 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
244 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
250 tree new_stmt_list = NULL_TREE;
251 edge pe = loop_preheader_edge (loop);
254 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
256 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
258 if (vect_print_dump_info (REPORT_DETAILS))
260 tree data_ref_base = base_name;
261 fprintf (vect_dump, "create vector-pointer variable to type: ");
262 print_generic_expr (vect_dump, vectype, TDF_SLIM);
263 if (TREE_CODE (data_ref_base) == VAR_DECL)
264 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
265 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
266 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
267 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
268 fprintf (vect_dump, " vectorizing a record based array ref: ");
269 else if (TREE_CODE (data_ref_base) == SSA_NAME)
270 fprintf (vect_dump, " vectorizing a pointer ref: ");
271 print_generic_expr (vect_dump, base_name, TDF_SLIM);
274 /** (1) Create the new vector-pointer variable: **/
276 vect_ptr_type = build_pointer_type (vectype);
277 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
278 get_name (base_name));
279 add_referenced_var (vect_ptr);
282 /** (2) Add aliasing information to the new vector-pointer:
283 (The points-to info (DR_PTR_INFO) may be defined later.) **/
285 tag = DR_MEMTAG (dr);
288 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
289 tag must be created with tag added to its may alias list. */
291 new_type_alias (vect_ptr, tag, DR_REF (dr));
293 var_ann (vect_ptr)->symbol_mem_tag = tag;
295 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
297 /** (3) Calculate the initial address the vector-pointer, and set
298 the vector-pointer to point to it before the loop: **/
300 /* Create: (&(base[init_val+offset]) in the loop preheader. */
301 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
303 pe = loop_preheader_edge (loop);
304 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
305 gcc_assert (!new_bb);
306 *initial_address = new_temp;
308 /* Create: p = (vectype *) initial_base */
309 vec_stmt = fold_convert (vect_ptr_type, new_temp);
310 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
311 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
312 TREE_OPERAND (vec_stmt, 0) = vect_ptr_init;
313 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
314 gcc_assert (!new_bb);
317 /** (4) Handle the updating of the vector-pointer inside the loop: **/
319 if (only_init) /* No update in loop is required. */
321 /* Copy the points-to information if it exists. */
322 if (DR_PTR_INFO (dr))
323 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
324 return vect_ptr_init;
328 block_stmt_iterator incr_bsi;
330 tree indx_before_incr, indx_after_incr;
333 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
334 create_iv (vect_ptr_init,
335 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
336 NULL_TREE, loop, &incr_bsi, insert_after,
337 &indx_before_incr, &indx_after_incr);
338 incr = bsi_stmt (incr_bsi);
339 set_stmt_info (stmt_ann (incr),
340 new_stmt_vec_info (incr, loop_vinfo));
342 /* Copy the points-to information if it exists. */
343 if (DR_PTR_INFO (dr))
345 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
346 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
348 merge_alias_info (vect_ptr_init, indx_before_incr);
349 merge_alias_info (vect_ptr_init, indx_after_incr);
353 return indx_before_incr;
358 /* Function bump_vector_ptr
360 Increment a pointer (to a vector type) by vector-size. Connect the new
361 increment stmt to the exising def-use update-chain of the pointer.
363 The pointer def-use update-chain before this function:
364 DATAREF_PTR = phi (p_0, p_2)
366 PTR_INCR: p_2 = DATAREF_PTR + step
368 The pointer def-use update-chain after this function:
369 DATAREF_PTR = phi (p_0, p_2)
371 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
373 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
376 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
378 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
379 The increment amount across iterations is also expected to be
381 BSI - location where the new update stmt is to be placed.
382 STMT - the original scalar memory-access stmt that is being vectorized.
384 Output: Return NEW_DATAREF_PTR as illustrated above.
389 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
392 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
393 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
394 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
395 tree vptr_type = TREE_TYPE (dataref_ptr);
396 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
397 tree update = fold_convert (vptr_type, TYPE_SIZE_UNIT (vectype));
401 tree new_dataref_ptr;
403 incr_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_var,
404 build2 (PLUS_EXPR, vptr_type, dataref_ptr, update));
405 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
406 TREE_OPERAND (incr_stmt, 0) = new_dataref_ptr;
407 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
409 /* Update the vector-pointer's cross-iteration increment. */
410 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
412 tree use = USE_FROM_PTR (use_p);
414 if (use == dataref_ptr)
415 SET_USE (use_p, new_dataref_ptr);
417 gcc_assert (tree_int_cst_compare (use, update) == 0);
420 /* Copy the points-to information if it exists. */
421 if (DR_PTR_INFO (dr))
422 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
423 merge_alias_info (new_dataref_ptr, dataref_ptr);
425 return new_dataref_ptr;
429 /* Function vect_create_destination_var.
431 Create a new temporary of type VECTYPE. */
434 vect_create_destination_var (tree scalar_dest, tree vectype)
437 const char *new_name;
439 enum vect_var_kind kind;
441 kind = vectype ? vect_simple_var : vect_scalar_var;
442 type = vectype ? vectype : TREE_TYPE (scalar_dest);
444 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
446 new_name = get_name (scalar_dest);
449 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
450 add_referenced_var (vec_dest);
456 /* Function vect_init_vector.
458 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
459 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
460 used in the vectorization of STMT. */
463 vect_init_vector (tree stmt, tree vector_var)
465 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
467 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
470 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
476 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
477 add_referenced_var (new_var);
479 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
480 new_temp = make_ssa_name (new_var, init_stmt);
481 TREE_OPERAND (init_stmt, 0) = new_temp;
483 pe = loop_preheader_edge (loop);
484 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
485 gcc_assert (!new_bb);
487 if (vect_print_dump_info (REPORT_DETAILS))
489 fprintf (vect_dump, "created new init_stmt: ");
490 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
493 vec_oprnd = TREE_OPERAND (init_stmt, 0);
498 /* Function vect_get_vec_def_for_operand.
500 OP is an operand in STMT. This function returns a (vector) def that will be
501 used in the vectorized stmt for STMT.
503 In the case that OP is an SSA_NAME which is defined in the loop, then
504 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
506 In case OP is an invariant or constant, a new stmt that creates a vector def
507 needs to be introduced. */
510 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
515 stmt_vec_info def_stmt_info = NULL;
516 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
517 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
518 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
519 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
520 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
526 enum vect_def_type dt;
529 if (vect_print_dump_info (REPORT_DETAILS))
531 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
532 print_generic_expr (vect_dump, op, TDF_SLIM);
535 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
536 gcc_assert (is_simple_use);
537 if (vect_print_dump_info (REPORT_DETAILS))
541 fprintf (vect_dump, "def = ");
542 print_generic_expr (vect_dump, def, TDF_SLIM);
546 fprintf (vect_dump, " def_stmt = ");
547 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
553 /* Case 1: operand is a constant. */
554 case vect_constant_def:
559 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
560 if (vect_print_dump_info (REPORT_DETAILS))
561 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
563 for (i = nunits - 1; i >= 0; --i)
565 t = tree_cons (NULL_TREE, op, t);
567 vec_cst = build_vector (vectype, t);
568 return vect_init_vector (stmt, vec_cst);
571 /* Case 2: operand is defined outside the loop - loop invariant. */
572 case vect_invariant_def:
577 /* Create 'vec_inv = {inv,inv,..,inv}' */
578 if (vect_print_dump_info (REPORT_DETAILS))
579 fprintf (vect_dump, "Create vector_inv.");
581 for (i = nunits - 1; i >= 0; --i)
583 t = tree_cons (NULL_TREE, def, t);
586 /* FIXME: use build_constructor directly. */
587 vec_inv = build_constructor_from_list (vectype, t);
588 return vect_init_vector (stmt, vec_inv);
591 /* Case 3: operand is defined inside the loop. */
595 *scalar_def = def_stmt;
597 /* Get the def from the vectorized stmt. */
598 def_stmt_info = vinfo_for_stmt (def_stmt);
599 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
600 gcc_assert (vec_stmt);
601 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
605 /* Case 4: operand is defined by a loop header phi - reduction */
606 case vect_reduction_def:
608 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
610 /* Get the def before the loop */
611 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
612 return get_initial_def_for_reduction (stmt, op, scalar_def);
615 /* Case 5: operand is defined by loop-header phi - induction. */
616 case vect_induction_def:
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "induction - unsupported.");
620 internal_error ("no support for induction"); /* FORNOW */
629 /* Function vect_get_vec_def_for_stmt_copy
631 Return a vector-def for an operand. This function is used when the
632 vectorized stmt to be created (by the caller to this function) is a "copy"
633 created in case the vectorized result cannot fit in one vector, and several
634 copies of the vector-stmt are required. In this case the vector-def is
635 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
636 of the stmt that defines VEC_OPRND.
637 DT is the type of the vector def VEC_OPRND.
640 In case the vectorization factor (VF) is bigger than the number
641 of elements that can fit in a vectype (nunits), we have to generate
642 more than one vector stmt to vectorize the scalar stmt. This situation
643 arises when there are multiple data-types operated upon in the loop; the
644 smallest data-type determines the VF, and as a result, when vectorizing
645 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
646 vector stmt (each computing a vector of 'nunits' results, and together
647 computing 'VF' results in each iteration). This function is called when
648 vectorizing such a stmt (e.g. vectorizing S2 in the illusration below, in
649 which VF=16 and nuniti=4, so the number of copies required is 4):
651 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
653 S1: x = load VS1.0: vx.0 = memref0 VS1.1
654 VS1.1: vx.1 = memref1 VS1.2
655 VS1.2: vx.2 = memref2 VS1.3
656 VS1.3: vx.3 = memref3
658 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
659 VSnew.1: vz1 = vx.1 + ... VSnew.2
660 VSnew.2: vz2 = vx.2 + ... VSnew.3
661 VSnew.3: vz3 = vx.3 + ...
663 The vectorization of S1 is explained in vectorizable_load.
664 The vectorization of S2:
665 To create the first vector-stmt out of the 4 copies - VSnew.0 -
666 the function 'vect_get_vec_def_for_operand' is called to
667 get the relevant vector-def for each operand of S2. For operand x it
668 returns the vector-def 'vx.0'.
670 To create the remaining copies of the vector-stmt (VSnew.j), this
671 function is called to get the relevant vector-def for each operand. It is
672 obtained from the respective VS1.j stmt, which is recorded in the
673 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
675 For example, to obtain the vector-def 'vx.1' in order to create the
676 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
677 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
678 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
679 and return its def ('vx.1').
680 Overall, to create the above sequence this function will be called 3 times:
681 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
682 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
683 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
686 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
688 tree vec_stmt_for_operand;
689 stmt_vec_info def_stmt_info;
691 if (dt == vect_invariant_def || dt == vect_constant_def)
693 /* Do nothing; can reuse same def. */ ;
697 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
698 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
699 gcc_assert (def_stmt_info);
700 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
701 gcc_assert (vec_stmt_for_operand);
702 vec_oprnd = TREE_OPERAND (vec_stmt_for_operand, 0);
708 /* Function vect_finish_stmt_generation.
710 Insert a new stmt. */
713 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
714 block_stmt_iterator *bsi)
716 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
717 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
719 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
720 set_stmt_info (get_stmt_ann (vec_stmt),
721 new_stmt_vec_info (vec_stmt, loop_vinfo));
723 if (vect_print_dump_info (REPORT_DETAILS))
725 fprintf (vect_dump, "add new stmt: ");
726 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
729 /* Make sure bsi points to the stmt that is being vectorized. */
730 gcc_assert (stmt == bsi_stmt (*bsi));
732 #ifdef USE_MAPPED_LOCATION
733 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
735 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
740 #define ADJUST_IN_EPILOG 1
742 /* Function get_initial_def_for_reduction
745 STMT - a stmt that performs a reduction operation in the loop.
746 INIT_VAL - the initial value of the reduction variable
749 SCALAR_DEF - a tree that holds a value to be added to the final result
750 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
751 Return a vector variable, initialized according to the operation that STMT
752 performs. This vector will be used as the initial value of the
753 vector of partial results.
755 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
758 min/max: [init_val,init_val,..,init_val,init_val]
759 bit and/or: [init_val,init_val,..,init_val,init_val]
760 and when necessary (e.g. add/mult case) let the caller know
761 that it needs to adjust the result by init_val.
763 Option2: Initialize the vector as follows:
764 add: [0,0,...,0,init_val]
765 mult: [1,1,...,1,init_val]
766 min/max: [init_val,init_val,...,init_val]
767 bit and/or: [init_val,init_val,...,init_val]
768 and no adjustments are needed.
770 For example, for the following code:
776 STMT is 's = s + a[i]', and the reduction variable is 's'.
777 For a vector of 4 units, we want to return either [0,0,0,init_val],
778 or [0,0,0,0] and let the caller know that it needs to adjust
779 the result at the end by 'init_val'.
781 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
782 TODO: Use some cost-model to estimate which scheme is more profitable.
786 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
788 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
789 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
790 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
792 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
793 tree type = TREE_TYPE (init_val);
795 tree vec, t = NULL_TREE;
796 bool need_epilog_adjust;
799 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
806 if (INTEGRAL_TYPE_P (type))
807 def = build_int_cst (type, 0);
809 def = build_real (type, dconst0);
811 #ifdef ADJUST_IN_EPILOG
812 /* All the 'nunits' elements are set to 0. The final result will be
813 adjusted by 'init_val' at the loop epilog. */
815 need_epilog_adjust = true;
817 /* 'nunits - 1' elements are set to 0; The last element is set to
818 'init_val'. No further adjustments at the epilog are needed. */
819 nelements = nunits - 1;
820 need_epilog_adjust = false;
828 need_epilog_adjust = false;
835 for (i = nelements - 1; i >= 0; --i)
836 t = tree_cons (NULL_TREE, def, t);
838 if (nelements == nunits - 1)
840 /* Set the last element of the vector. */
841 t = tree_cons (NULL_TREE, init_val, t);
844 gcc_assert (nelements == nunits);
846 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
847 vec = build_vector (vectype, t);
849 vec = build_constructor_from_list (vectype, t);
851 if (!need_epilog_adjust)
852 *scalar_def = NULL_TREE;
854 *scalar_def = init_val;
856 return vect_init_vector (stmt, vec);
860 /* Function vect_create_epilog_for_reduction
862 Create code at the loop-epilog to finalize the result of a reduction
865 VECT_DEF is a vector of partial results.
866 REDUC_CODE is the tree-code for the epilog reduction.
867 STMT is the scalar reduction stmt that is being vectorized.
868 REDUCTION_PHI is the phi-node that carries the reduction computation.
871 1. Creates the reduction def-use cycle: sets the the arguments for
873 The loop-entry argument is the vectorized initial-value of the reduction.
874 The loop-latch argument is VECT_DEF - the vector of partial sums.
875 2. "Reduces" the vector of partial results VECT_DEF into a single result,
876 by applying the operation specified by REDUC_CODE if available, or by
877 other means (whole-vector shifts or a scalar loop).
878 The function also creates a new phi node at the loop exit to preserve
879 loop-closed form, as illustrated below.
881 The flow at the entry to this function:
884 vec_def = phi <null, null> # REDUCTION_PHI
885 VECT_DEF = vector_stmt # vectorized form of STMT
886 s_loop = scalar_stmt # (scalar) STMT
888 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
892 The above is transformed by this function into:
895 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
896 VECT_DEF = vector_stmt # vectorized form of STMT
897 s_loop = scalar_stmt # (scalar) STMT
899 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
900 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
901 v_out2 = reduce <v_out1>
902 s_out3 = extract_field <v_out2, 0>
903 s_out4 = adjust_result <s_out3>
909 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
910 enum tree_code reduc_code, tree reduction_phi)
912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
914 enum machine_mode mode;
915 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
916 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
921 block_stmt_iterator exit_bsi;
926 tree new_scalar_dest, exit_phi;
927 tree bitsize, bitpos, bytesize;
928 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
929 tree scalar_initial_def;
930 tree vec_initial_def;
932 imm_use_iterator imm_iter;
934 bool extract_scalar_result;
938 tree operation = TREE_OPERAND (stmt, 1);
941 op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
942 reduction_op = TREE_OPERAND (operation, op_type-1);
943 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
944 mode = TYPE_MODE (vectype);
946 /*** 1. Create the reduction def-use cycle ***/
948 /* 1.1 set the loop-entry arg of the reduction-phi: */
949 /* For the case of reduction, vect_get_vec_def_for_operand returns
950 the scalar def before the loop, that defines the initial value
951 of the reduction variable. */
952 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
953 &scalar_initial_def);
954 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
956 /* 1.2 set the loop-latch arg for the reduction-phi: */
957 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
959 if (vect_print_dump_info (REPORT_DETAILS))
961 fprintf (vect_dump, "transform reduction: created def-use cycle:");
962 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
963 fprintf (vect_dump, "\n");
964 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
968 /*** 2. Create epilog code
969 The reduction epilog code operates across the elements of the vector
970 of partial results computed by the vectorized loop.
971 The reduction epilog code consists of:
972 step 1: compute the scalar result in a vector (v_out2)
973 step 2: extract the scalar result (s_out3) from the vector (v_out2)
974 step 3: adjust the scalar result (s_out3) if needed.
976 Step 1 can be accomplished using one the following three schemes:
977 (scheme 1) using reduc_code, if available.
978 (scheme 2) using whole-vector shifts, if available.
979 (scheme 3) using a scalar loop. In this case steps 1+2 above are
982 The overall epilog code looks like this:
984 s_out0 = phi <s_loop> # original EXIT_PHI
985 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
986 v_out2 = reduce <v_out1> # step 1
987 s_out3 = extract_field <v_out2, 0> # step 2
988 s_out4 = adjust_result <s_out3> # step 3
990 (step 3 is optional, and step2 1 and 2 may be combined).
991 Lastly, the uses of s_out0 are replaced by s_out4.
995 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
996 v_out1 = phi <v_loop> */
998 exit_bb = loop->single_exit->dest;
999 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1000 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
1001 exit_bsi = bsi_start (exit_bb);
1003 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1004 (i.e. when reduc_code is not available) and in the final adjustment code
1005 (if needed). Also get the original scalar reduction variable as
1006 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1007 represents a reduction pattern), the tree-code and scalar-def are
1008 taken from the original stmt that the pattern-stmt (STMT) replaces.
1009 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1010 are taken from STMT. */
1012 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1015 /* Regular reduction */
1020 /* Reduction pattern */
1021 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1022 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1023 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1025 code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1026 scalar_dest = TREE_OPERAND (orig_stmt, 0);
1027 scalar_type = TREE_TYPE (scalar_dest);
1028 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1029 bitsize = TYPE_SIZE (scalar_type);
1030 bytesize = TYPE_SIZE_UNIT (scalar_type);
1032 /* 2.3 Create the reduction code, using one of the three schemes described
1035 if (reduc_code < NUM_TREE_CODES)
1037 /*** Case 1: Create:
1038 v_out2 = reduc_expr <v_out1> */
1040 if (vect_print_dump_info (REPORT_DETAILS))
1041 fprintf (vect_dump, "Reduce using direct vector reduction.");
1043 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1044 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1045 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
1046 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1047 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1048 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1050 extract_scalar_result = true;
1054 enum tree_code shift_code = 0;
1055 bool have_whole_vector_shift = true;
1057 int element_bitsize = tree_low_cst (bitsize, 1);
1058 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1061 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1062 shift_code = VEC_RSHIFT_EXPR;
1064 have_whole_vector_shift = false;
1066 /* Regardless of whether we have a whole vector shift, if we're
1067 emulating the operation via tree-vect-generic, we don't want
1068 to use it. Only the first round of the reduction is likely
1069 to still be profitable via emulation. */
1070 /* ??? It might be better to emit a reduction tree code here, so that
1071 tree-vect-generic can expand the first round via bit tricks. */
1072 if (!VECTOR_MODE_P (mode))
1073 have_whole_vector_shift = false;
1076 optab optab = optab_for_tree_code (code, vectype);
1077 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1078 have_whole_vector_shift = false;
1081 if (have_whole_vector_shift)
1083 /*** Case 2: Create:
1084 for (offset = VS/2; offset >= element_size; offset/=2)
1086 Create: va' = vec_shift <va, offset>
1087 Create: va = vop <va, va'>
1090 if (vect_print_dump_info (REPORT_DETAILS))
1091 fprintf (vect_dump, "Reduce using vector shifts");
1093 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1094 new_temp = PHI_RESULT (new_phi);
1096 for (bit_offset = vec_size_in_bits/2;
1097 bit_offset >= element_bitsize;
1100 tree bitpos = size_int (bit_offset);
1102 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1103 build2 (shift_code, vectype,
1105 new_name = make_ssa_name (vec_dest, epilog_stmt);
1106 TREE_OPERAND (epilog_stmt, 0) = new_name;
1107 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1109 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1110 build2 (code, vectype,
1111 new_name, new_temp));
1112 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1113 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1114 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1117 extract_scalar_result = true;
1123 /*** Case 3: Create:
1124 s = extract_field <v_out2, 0>
1125 for (offset = element_size;
1126 offset < vector_size;
1127 offset += element_size;)
1129 Create: s' = extract_field <v_out2, offset>
1130 Create: s = op <s, s'>
1133 if (vect_print_dump_info (REPORT_DETAILS))
1134 fprintf (vect_dump, "Reduce using scalar code. ");
1136 vec_temp = PHI_RESULT (new_phi);
1137 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1138 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1140 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1141 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1142 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1143 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1144 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1146 for (bit_offset = element_bitsize;
1147 bit_offset < vec_size_in_bits;
1148 bit_offset += element_bitsize)
1150 tree bitpos = bitsize_int (bit_offset);
1151 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1154 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1155 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1157 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1158 TREE_OPERAND (epilog_stmt, 0) = new_name;
1159 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1161 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1162 build2 (code, scalar_type, new_name, new_temp));
1163 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1164 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1165 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1168 extract_scalar_result = false;
1172 /* 2.4 Extract the final scalar result. Create:
1173 s_out3 = extract_field <v_out2, bitpos> */
1175 if (extract_scalar_result)
1179 if (vect_print_dump_info (REPORT_DETAILS))
1180 fprintf (vect_dump, "extract scalar result");
1182 if (BYTES_BIG_ENDIAN)
1183 bitpos = size_binop (MULT_EXPR,
1184 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1185 TYPE_SIZE (scalar_type));
1187 bitpos = bitsize_zero_node;
1189 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1190 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1191 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1192 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1193 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1194 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1197 /* 2.4 Adjust the final result by the initial value of the reduction
1198 variable. (When such adjustment is not needed, then
1199 'scalar_initial_def' is zero).
1202 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1204 if (scalar_initial_def)
1206 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1207 build2 (code, scalar_type, new_temp, scalar_initial_def));
1208 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1209 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1210 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1213 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1215 /* Find the loop-closed-use at the loop exit of the original scalar result.
1216 (The reduction result is expected to have two immediate uses - one at the
1217 latch block, and one at the loop exit). */
1219 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1221 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1223 exit_phi = USE_STMT (use_p);
1227 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1228 gcc_assert (exit_phi);
1229 /* Replace the uses: */
1230 orig_name = PHI_RESULT (exit_phi);
1231 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1232 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1233 SET_USE (use_p, new_temp);
1237 /* Function vectorizable_reduction.
1239 Check if STMT performs a reduction operation that can be vectorized.
1240 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1241 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1242 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1244 This function also handles reduction idioms (patterns) that have been
1245 recognized in advance during vect_pattern_recog. In this case, STMT may be
1247 X = pattern_expr (arg0, arg1, ..., X)
1248 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1249 sequence that had been detected and replaced by the pattern-stmt (STMT).
1251 In some cases of reduction patterns, the type of the reduction variable X is
1252 different than the type of the other arguments of STMT.
1253 In such cases, the vectype that is used when transforming STMT into a vector
1254 stmt is different than the vectype that is used to determine the
1255 vectorization factor, because it consists of a different number of elements
1256 than the actual number of elements that are being operated upon in parallel.
1258 For example, consider an accumulation of shorts into an int accumulator.
1259 On some targets it's possible to vectorize this pattern operating on 8
1260 shorts at a time (hence, the vectype for purposes of determining the
1261 vectorization factor should be V8HI); on the other hand, the vectype that
1262 is used to create the vector form is actually V4SI (the type of the result).
1264 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1265 indicates what is the actual level of parallelism (V8HI in the example), so
1266 that the right vectorization factor would be derived. This vectype
1267 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1268 be used to create the vectorized stmt. The right vectype for the vectorized
1269 stmt is obtained from the type of the result X:
1270 get_vectype_for_scalar_type (TREE_TYPE (X))
1272 This means that, contrary to "regular" reductions (or "regular" stmts in
1273 general), the following equation:
1274 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1275 does *NOT* necessarily hold for reduction patterns. */
1278 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1283 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1284 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1285 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1286 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1287 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1289 enum tree_code code, orig_code, epilog_reduc_code = 0;
1290 enum machine_mode vec_mode;
1292 optab optab, reduc_optab;
1293 tree new_temp = NULL_TREE;
1295 enum vect_def_type dt;
1300 stmt_vec_info orig_stmt_info;
1301 tree expr = NULL_TREE;
1303 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1304 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1305 stmt_vec_info prev_stmt_info;
1307 tree new_stmt = NULL_TREE;
1310 gcc_assert (ncopies >= 1);
1312 /* 1. Is vectorizable reduction? */
1314 /* Not supportable if the reduction variable is used in the loop. */
1315 if (STMT_VINFO_RELEVANT_P (stmt_info))
1318 if (!STMT_VINFO_LIVE_P (stmt_info))
1321 /* Make sure it was already recognized as a reduction computation. */
1322 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1325 /* 2. Has this been recognized as a reduction pattern?
1327 Check if STMT represents a pattern that has been recognized
1328 in earlier analysis stages. For stmts that represent a pattern,
1329 the STMT_VINFO_RELATED_STMT field records the last stmt in
1330 the original sequence that constitutes the pattern. */
1332 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1335 orig_stmt_info = vinfo_for_stmt (orig_stmt);
1336 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
1337 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
1338 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
1341 /* 3. Check the operands of the operation. The first operands are defined
1342 inside the loop body. The last operand is the reduction variable,
1343 which is defined by the loop-header-phi. */
1345 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1347 operation = TREE_OPERAND (stmt, 1);
1348 code = TREE_CODE (operation);
1349 op_type = TREE_CODE_LENGTH (code);
1350 if (op_type != binary_op && op_type != ternary_op)
1352 scalar_dest = TREE_OPERAND (stmt, 0);
1353 scalar_type = TREE_TYPE (scalar_dest);
1355 /* All uses but the last are expected to be defined in the loop.
1356 The last use is the reduction variable. */
1357 for (i = 0; i < op_type-1; i++)
1359 op = TREE_OPERAND (operation, i);
1360 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1361 gcc_assert (is_simple_use);
1362 gcc_assert (dt == vect_loop_def || dt == vect_invariant_def ||
1363 dt == vect_constant_def);
1366 op = TREE_OPERAND (operation, i);
1367 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1368 gcc_assert (is_simple_use);
1369 gcc_assert (dt == vect_reduction_def);
1370 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1372 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
1374 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
1376 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
1379 /* 4. Supportable by target? */
1381 /* 4.1. check support for the operation in the loop */
1382 optab = optab_for_tree_code (code, vectype);
1385 if (vect_print_dump_info (REPORT_DETAILS))
1386 fprintf (vect_dump, "no optab.");
1389 vec_mode = TYPE_MODE (vectype);
1390 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1392 if (vect_print_dump_info (REPORT_DETAILS))
1393 fprintf (vect_dump, "op not supported by target.");
1394 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1395 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1396 < vect_min_worthwhile_factor (code))
1398 if (vect_print_dump_info (REPORT_DETAILS))
1399 fprintf (vect_dump, "proceeding using word mode.");
1402 /* Worthwhile without SIMD support? */
1403 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1404 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1405 < vect_min_worthwhile_factor (code))
1407 if (vect_print_dump_info (REPORT_DETAILS))
1408 fprintf (vect_dump, "not worthwhile without SIMD support.");
1412 /* 4.2. Check support for the epilog operation.
1414 If STMT represents a reduction pattern, then the type of the
1415 reduction variable may be different than the type of the rest
1416 of the arguments. For example, consider the case of accumulation
1417 of shorts into an int accumulator; The original code:
1418 S1: int_a = (int) short_a;
1419 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
1422 STMT: int_acc = widen_sum <short_a, int_acc>
1425 1. The tree-code that is used to create the vector operation in the
1426 epilog code (that reduces the partial results) is not the
1427 tree-code of STMT, but is rather the tree-code of the original
1428 stmt from the pattern that STMT is replacing. I.e, in the example
1429 above we want to use 'widen_sum' in the loop, but 'plus' in the
1431 2. The type (mode) we use to check available target support
1432 for the vector operation to be created in the *epilog*, is
1433 determined by the type of the reduction variable (in the example
1434 above we'd check this: plus_optab[vect_int_mode]).
1435 However the type (mode) we use to check available target support
1436 for the vector operation to be created *inside the loop*, is
1437 determined by the type of the other arguments to STMT (in the
1438 example we'd check this: widen_sum_optab[vect_short_mode]).
1440 This is contrary to "regular" reductions, in which the types of all
1441 the arguments are the same as the type of the reduction variable.
1442 For "regular" reductions we can therefore use the same vector type
1443 (and also the same tree-code) when generating the epilog code and
1444 when generating the code inside the loop. */
1448 /* This is a reduction pattern: get the vectype from the type of the
1449 reduction variable, and get the tree-code from orig_stmt. */
1450 orig_code = TREE_CODE (TREE_OPERAND (orig_stmt, 1));
1451 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
1452 vec_mode = TYPE_MODE (vectype);
1456 /* Regular reduction: use the same vectype and tree-code as used for
1457 the vector code inside the loop can be used for the epilog code. */
1461 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
1463 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
1466 if (vect_print_dump_info (REPORT_DETAILS))
1467 fprintf (vect_dump, "no optab for reduction.");
1468 epilog_reduc_code = NUM_TREE_CODES;
1470 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1472 if (vect_print_dump_info (REPORT_DETAILS))
1473 fprintf (vect_dump, "reduc op not supported by target.");
1474 epilog_reduc_code = NUM_TREE_CODES;
1477 if (!vec_stmt) /* transformation not required. */
1479 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1485 if (vect_print_dump_info (REPORT_DETAILS))
1486 fprintf (vect_dump, "transform reduction.");
1488 /* Create the destination vector */
1489 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1491 /* Create the reduction-phi that defines the reduction-operand. */
1492 new_phi = create_phi_node (vec_dest, loop->header);
1494 /* In case the vectorization factor (VF) is bigger than the number
1495 of elements that we can fit in a vectype (nunits), we have to generate
1496 more than one vector stmt - i.e - we need to "unroll" the
1497 vector stmt by a factor VF/nunits. For more details see documentation
1498 in vectorizable_operation. */
1500 prev_stmt_info = NULL;
1501 for (j = 0; j < ncopies; j++)
1506 op = TREE_OPERAND (operation, 0);
1507 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
1508 if (op_type == ternary_op)
1510 op = TREE_OPERAND (operation, 1);
1511 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1514 /* Get the vector def for the reduction variable from the phi node */
1515 reduc_def = PHI_RESULT (new_phi);
1519 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
1520 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
1521 if (op_type == ternary_op)
1522 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
1524 /* Get the vector def for the reduction variable from the vectorized
1525 reduction operation generated in the previous iteration (j-1) */
1526 reduc_def = TREE_OPERAND (new_stmt ,0);
1529 /* Arguments are ready. create the new vector stmt. */
1531 if (op_type == binary_op)
1532 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
1534 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
1536 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
1537 new_temp = make_ssa_name (vec_dest, new_stmt);
1538 TREE_OPERAND (new_stmt, 0) = new_temp;
1539 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1542 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1544 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1545 prev_stmt_info = vinfo_for_stmt (new_stmt);
1548 /* Finalize the reduction-phi (set it's arguments) and create the
1549 epilog reduction code. */
1550 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
1555 /* Function vectorizable_assignment.
1557 Check if STMT performs an assignment (copy) that can be vectorized.
1558 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1559 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1560 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1563 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1569 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1570 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1571 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1574 enum vect_def_type dt;
1575 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1576 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1578 gcc_assert (ncopies >= 1);
1580 return false; /* FORNOW */
1582 /* Is vectorizable assignment? */
1583 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1586 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1588 if (TREE_CODE (stmt) != MODIFY_EXPR)
1591 scalar_dest = TREE_OPERAND (stmt, 0);
1592 if (TREE_CODE (scalar_dest) != SSA_NAME)
1595 op = TREE_OPERAND (stmt, 1);
1596 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1598 if (vect_print_dump_info (REPORT_DETAILS))
1599 fprintf (vect_dump, "use not simple.");
1603 if (!vec_stmt) /* transformation not required. */
1605 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "transform assignment.");
1614 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1617 op = TREE_OPERAND (stmt, 1);
1618 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1620 /* Arguments are ready. create the new vector stmt. */
1621 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1622 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1623 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1624 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1630 /* Function vect_min_worthwhile_factor.
1632 For a loop where we could vectorize the operation indicated by CODE,
1633 return the minimum vectorization factor that makes it worthwhile
1634 to use generic vectors. */
1636 vect_min_worthwhile_factor (enum tree_code code)
1657 /* Function vectorizable_operation.
1659 Check if STMT performs a binary or unary operation that can be vectorized.
1660 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1661 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1662 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1665 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1670 tree op0, op1 = NULL;
1671 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
1672 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1673 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1674 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1675 enum tree_code code;
1676 enum machine_mode vec_mode;
1681 enum machine_mode optab_op2_mode;
1683 enum vect_def_type dt0, dt1;
1685 stmt_vec_info prev_stmt_info;
1686 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
1689 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
1692 gcc_assert (ncopies >= 1);
1694 /* Is STMT a vectorizable binary/unary operation? */
1695 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1698 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1700 if (STMT_VINFO_LIVE_P (stmt_info))
1702 /* FORNOW: not yet supported. */
1703 if (vect_print_dump_info (REPORT_DETAILS))
1704 fprintf (vect_dump, "value used after loop.");
1708 if (TREE_CODE (stmt) != MODIFY_EXPR)
1711 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1714 scalar_dest = TREE_OPERAND (stmt, 0);
1715 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
1716 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1717 if (nunits_out != nunits_in)
1720 operation = TREE_OPERAND (stmt, 1);
1721 code = TREE_CODE (operation);
1722 optab = optab_for_tree_code (code, vectype);
1724 /* Support only unary or binary operations. */
1725 op_type = TREE_CODE_LENGTH (code);
1726 if (op_type != unary_op && op_type != binary_op)
1728 if (vect_print_dump_info (REPORT_DETAILS))
1729 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1733 op0 = TREE_OPERAND (operation, 0);
1734 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
1736 if (vect_print_dump_info (REPORT_DETAILS))
1737 fprintf (vect_dump, "use not simple.");
1741 if (op_type == binary_op)
1743 op1 = TREE_OPERAND (operation, 1);
1744 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
1746 if (vect_print_dump_info (REPORT_DETAILS))
1747 fprintf (vect_dump, "use not simple.");
1752 /* Supportable by target? */
1755 if (vect_print_dump_info (REPORT_DETAILS))
1756 fprintf (vect_dump, "no optab.");
1759 vec_mode = TYPE_MODE (vectype);
1760 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1761 if (icode == CODE_FOR_nothing)
1763 if (vect_print_dump_info (REPORT_DETAILS))
1764 fprintf (vect_dump, "op not supported by target.");
1765 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1766 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1767 < vect_min_worthwhile_factor (code))
1769 if (vect_print_dump_info (REPORT_DETAILS))
1770 fprintf (vect_dump, "proceeding using word mode.");
1773 /* Worthwhile without SIMD support? */
1774 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1775 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1776 < vect_min_worthwhile_factor (code))
1778 if (vect_print_dump_info (REPORT_DETAILS))
1779 fprintf (vect_dump, "not worthwhile without SIMD support.");
1783 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1785 /* FORNOW: not yet supported. */
1786 if (!VECTOR_MODE_P (vec_mode))
1789 /* Invariant argument is needed for a vector shift
1790 by a scalar shift operand. */
1791 optab_op2_mode = insn_data[icode].operand[2].mode;
1792 if (! (VECTOR_MODE_P (optab_op2_mode)
1793 || dt1 == vect_constant_def
1794 || dt1 == vect_invariant_def))
1796 if (vect_print_dump_info (REPORT_DETAILS))
1797 fprintf (vect_dump, "operand mode requires invariant argument.");
1802 if (!vec_stmt) /* transformation not required. */
1804 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1810 if (vect_print_dump_info (REPORT_DETAILS))
1811 fprintf (vect_dump, "transform binary/unary operation.");
1814 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1816 /* In case the vectorization factor (VF) is bigger than the number
1817 of elements that we can fit in a vectype (nunits), we have to generate
1818 more than one vector stmt - i.e - we need to "unroll" the
1819 vector stmt by a factor VF/nunits. In doing so, we record a pointer
1820 from one copy of the vector stmt to the next, in the field
1821 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
1822 stages to find the correct vector defs to be used when vectorizing
1823 stmts that use the defs of the current stmt. The example below illustrates
1824 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
1825 4 vectorized stmts):
1827 before vectorization:
1828 RELATED_STMT VEC_STMT
1832 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
1834 RELATED_STMT VEC_STMT
1835 VS1_0: vx0 = memref0 VS1_1 -
1836 VS1_1: vx1 = memref1 VS1_2 -
1837 VS1_2: vx2 = memref2 VS1_3 -
1838 VS1_3: vx3 = memref3 - -
1839 S1: x = load - VS1_0
1842 step2: vectorize stmt S2 (done here):
1843 To vectorize stmt S2 we first need to find the relevant vector
1844 def for the first operand 'x'. This is, as usual, obtained from
1845 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
1846 that defines 'x' (S1). This way we find the stmt VS1_0, and the
1847 relevant vector def 'vx0'. Having found 'vx0' we can generate
1848 the vector stmt VS2_0, and as usual, record it in the
1849 STMT_VINFO_VEC_STMT of stmt S2.
1850 When creating the second copy (VS2_1), we obtain the relevant vector
1851 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
1852 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
1853 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
1854 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
1855 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
1856 chain of stmts and pointers:
1857 RELATED_STMT VEC_STMT
1858 VS1_0: vx0 = memref0 VS1_1 -
1859 VS1_1: vx1 = memref1 VS1_2 -
1860 VS1_2: vx2 = memref2 VS1_3 -
1861 VS1_3: vx3 = memref3 - -
1862 S1: x = load - VS1_0
1863 VS2_0: vz0 = vx0 + v1 VS2_1 -
1864 VS2_1: vz1 = vx1 + v1 VS2_2 -
1865 VS2_2: vz2 = vx2 + v1 VS2_3 -
1866 VS2_3: vz3 = vx3 + v1 - -
1867 S2: z = x + 1 - VS2_0 */
1869 prev_stmt_info = NULL;
1870 for (j = 0; j < ncopies; j++)
1875 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1876 if (op_type == binary_op)
1878 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1880 /* Vector shl and shr insn patterns can be defined with
1881 scalar operand 2 (shift operand). In this case, use
1882 constant or loop invariant op1 directly, without
1883 extending it to vector mode first. */
1884 optab_op2_mode = insn_data[icode].operand[2].mode;
1885 if (!VECTOR_MODE_P (optab_op2_mode))
1887 if (vect_print_dump_info (REPORT_DETAILS))
1888 fprintf (vect_dump, "operand 1 using scalar mode.");
1893 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1898 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
1899 if (op_type == binary_op)
1900 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
1903 /* Arguments are ready. create the new vector stmt. */
1905 if (op_type == binary_op)
1906 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest,
1907 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1909 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest,
1910 build1 (code, vectype, vec_oprnd0));
1911 new_temp = make_ssa_name (vec_dest, new_stmt);
1912 TREE_OPERAND (new_stmt, 0) = new_temp;
1913 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1916 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
1918 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
1919 prev_stmt_info = vinfo_for_stmt (new_stmt);
1926 /* Function vectorizable_type_demotion
1928 Check if STMT performs a binary or unary operation that involves
1929 type demotion, and if it can be vectorized.
1930 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1931 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1932 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1935 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
1942 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
1943 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1944 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1945 enum tree_code code;
1948 enum vect_def_type dt0;
1950 stmt_vec_info prev_stmt_info;
1960 enum machine_mode vec_mode;
1962 /* Is STMT a vectorizable type-demotion operation? */
1964 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1967 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1969 if (STMT_VINFO_LIVE_P (stmt_info))
1971 /* FORNOW: not yet supported. */
1972 if (vect_print_dump_info (REPORT_DETAILS))
1973 fprintf (vect_dump, "value used after loop.");
1977 if (TREE_CODE (stmt) != MODIFY_EXPR)
1980 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1983 operation = TREE_OPERAND (stmt, 1);
1984 code = TREE_CODE (operation);
1985 if (code != NOP_EXPR && code != CONVERT_EXPR)
1988 op0 = TREE_OPERAND (operation, 0);
1989 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
1990 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
1992 scalar_dest = TREE_OPERAND (stmt, 0);
1993 scalar_type = TREE_TYPE (scalar_dest);
1994 vectype_out = get_vectype_for_scalar_type (scalar_type);
1995 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
1996 if (nunits_in != nunits_out / 2) /* FORNOW */
1999 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2000 gcc_assert (ncopies >= 1);
2002 /* Check the operands of the operation. */
2003 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2005 if (vect_print_dump_info (REPORT_DETAILS))
2006 fprintf (vect_dump, "use not simple.");
2010 /* Supportable by target? */
2011 code = VEC_PACK_MOD_EXPR;
2012 optab = optab_for_tree_code (VEC_PACK_MOD_EXPR, vectype_in);
2016 vec_mode = TYPE_MODE (vectype_in);
2017 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2020 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2022 if (!vec_stmt) /* transformation not required. */
2024 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
2030 if (vect_print_dump_info (REPORT_DETAILS))
2031 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
2035 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2037 /* In case the vectorization factor (VF) is bigger than the number
2038 of elements that we can fit in a vectype (nunits), we have to generate
2039 more than one vector stmt - i.e - we need to "unroll" the
2040 vector stmt by a factor VF/nunits. */
2041 prev_stmt_info = NULL;
2042 for (j = 0; j < ncopies; j++)
2047 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2048 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2049 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd0);
2053 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2054 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2057 /* Arguments are ready. Create the new vector stmt. */
2058 expr = build2 (code, vectype_out, vec_oprnd0, vec_oprnd1);
2059 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
2060 new_temp = make_ssa_name (vec_dest, new_stmt);
2061 TREE_OPERAND (new_stmt, 0) = new_temp;
2062 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2065 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2067 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2069 prev_stmt_info = vinfo_for_stmt (new_stmt);
2072 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2077 /* Function vect_gen_widened_results_half
2079 Create a vector stmt whose code, type, number of arguments, and result
2080 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2081 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2082 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2083 needs to be created (DECL is a function-decl of a target-builtin).
2084 STMT is the original scalar stmt that we are vectorizing. */
2087 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2088 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2089 tree vec_dest, block_stmt_iterator *bsi,
2099 /* Generate half of the widened result: */
2100 if (code == CALL_EXPR)
2102 /* Target specific support */
2103 vec_params = build_tree_list (NULL_TREE, vec_oprnd0);
2104 if (op_type == binary_op)
2105 vec_params = tree_cons (NULL_TREE, vec_oprnd1, vec_params);
2106 expr = build_function_call_expr (decl, vec_params);
2110 /* Generic support */
2111 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2112 if (op_type == binary_op)
2113 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2115 expr = build1 (code, vectype, vec_oprnd0);
2117 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, expr);
2118 new_temp = make_ssa_name (vec_dest, new_stmt);
2119 TREE_OPERAND (new_stmt, 0) = new_temp;
2120 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2122 if (code == CALL_EXPR)
2124 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2126 if (TREE_CODE (sym) == SSA_NAME)
2127 sym = SSA_NAME_VAR (sym);
2128 mark_sym_for_renaming (sym);
2136 /* Function vectorizable_type_promotion
2138 Check if STMT performs a binary or unary operation that involves
2139 type promotion, and if it can be vectorized.
2140 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2141 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2142 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2145 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
2151 tree op0, op1 = NULL;
2152 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
2153 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2154 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2155 enum tree_code code, code1 = CODE_FOR_nothing, code2 = CODE_FOR_nothing;
2156 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2159 enum vect_def_type dt0, dt1;
2161 stmt_vec_info prev_stmt_info;
2169 /* Is STMT a vectorizable type-promotion operation? */
2171 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2174 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2176 if (STMT_VINFO_LIVE_P (stmt_info))
2178 /* FORNOW: not yet supported. */
2179 if (vect_print_dump_info (REPORT_DETAILS))
2180 fprintf (vect_dump, "value used after loop.");
2184 if (TREE_CODE (stmt) != MODIFY_EXPR)
2187 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2190 operation = TREE_OPERAND (stmt, 1);
2191 code = TREE_CODE (operation);
2192 if (code != NOP_EXPR && code != WIDEN_MULT_EXPR)
2195 op0 = TREE_OPERAND (operation, 0);
2196 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
2197 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2198 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2199 gcc_assert (ncopies >= 1);
2201 scalar_dest = TREE_OPERAND (stmt, 0);
2202 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
2203 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2204 if (nunits_out != nunits_in / 2) /* FORNOW */
2207 /* Check the operands of the operation. */
2208 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2210 if (vect_print_dump_info (REPORT_DETAILS))
2211 fprintf (vect_dump, "use not simple.");
2215 op_type = TREE_CODE_LENGTH (code);
2216 if (op_type == binary_op)
2218 op1 = TREE_OPERAND (operation, 1);
2219 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt1))
2221 if (vect_print_dump_info (REPORT_DETAILS))
2222 fprintf (vect_dump, "use not simple.");
2227 /* Supportable by target? */
2228 if (!supportable_widening_operation (code, stmt, vectype_in,
2229 &decl1, &decl2, &code1, &code2))
2232 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2234 if (!vec_stmt) /* transformation not required. */
2236 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
2242 if (vect_print_dump_info (REPORT_DETAILS))
2243 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
2247 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2249 /* In case the vectorization factor (VF) is bigger than the number
2250 of elements that we can fit in a vectype (nunits), we have to generate
2251 more than one vector stmt - i.e - we need to "unroll" the
2252 vector stmt by a factor VF/nunits. */
2254 prev_stmt_info = NULL;
2255 for (j = 0; j < ncopies; j++)
2260 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2261 if (op_type == binary_op)
2262 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
2266 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2267 if (op_type == binary_op)
2268 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt1, vec_oprnd1);
2271 /* Arguments are ready. Create the new vector stmt. We are creating
2272 two vector defs because the widened result does not fit in one vector.
2273 The vectorized stmt can be expressed as a call to a taregt builtin,
2274 or a using a tree-code. */
2275 /* Generate first half of the widened result: */
2276 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
2277 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2279 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2281 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2282 prev_stmt_info = vinfo_for_stmt (new_stmt);
2284 /* Generate second half of the widened result: */
2285 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
2286 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
2287 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2288 prev_stmt_info = vinfo_for_stmt (new_stmt);
2292 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2297 /* Function vectorizable_store.
2299 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2301 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2302 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2303 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2306 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2311 tree vec_oprnd = NULL_TREE;
2312 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2313 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2314 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2316 enum machine_mode vec_mode;
2318 enum dr_alignment_support alignment_support_cheme;
2320 def_operand_p def_p;
2322 enum vect_def_type dt;
2323 stmt_vec_info prev_stmt_info;
2324 tree dataref_ptr = NULL_TREE;
2325 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2326 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2329 gcc_assert (ncopies >= 1);
2331 /* Is vectorizable store? */
2333 if (TREE_CODE (stmt) != MODIFY_EXPR)
2336 scalar_dest = TREE_OPERAND (stmt, 0);
2337 if (TREE_CODE (scalar_dest) != ARRAY_REF
2338 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2341 op = TREE_OPERAND (stmt, 1);
2342 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2344 if (vect_print_dump_info (REPORT_DETAILS))
2345 fprintf (vect_dump, "use not simple.");
2349 vec_mode = TYPE_MODE (vectype);
2350 /* FORNOW. In some cases can vectorize even if data-type not supported
2351 (e.g. - array initialization with 0). */
2352 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2355 if (!STMT_VINFO_DATA_REF (stmt_info))
2359 if (!vec_stmt) /* transformation not required. */
2361 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2367 if (vect_print_dump_info (REPORT_DETAILS))
2368 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
2370 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2371 gcc_assert (alignment_support_cheme);
2372 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
2374 /* In case the vectorization factor (VF) is bigger than the number
2375 of elements that we can fit in a vectype (nunits), we have to generate
2376 more than one vector stmt - i.e - we need to "unroll" the
2377 vector stmt by a factor VF/nunits. For more details see documentation in
2378 vect_get_vec_def_for_copy_stmt. */
2380 prev_stmt_info = NULL;
2381 for (j = 0; j < ncopies; j++)
2388 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2389 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy,
2394 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, vec_oprnd);
2395 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2398 /* Arguments are ready. create the new vector stmt. */
2399 data_ref = build_fold_indirect_ref (dataref_ptr);
2400 new_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd);
2401 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2403 /* Set the V_MAY_DEFS for the vector pointer. If this virtual def has a
2404 use outside the loop and a loop peel is performed then the def may be
2405 renamed by the peel. Mark it for renaming so the later use will also
2407 copy_virtual_operands (new_stmt, stmt);
2410 /* The original store is deleted so the same SSA_NAMEs can be used.
2412 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
2414 SSA_NAME_DEF_STMT (def) = new_stmt;
2415 mark_sym_for_renaming (SSA_NAME_VAR (def));
2418 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2422 /* Create new names for all the definitions created by COPY and
2423 add replacement mappings for each new name. */
2424 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VMAYDEF)
2426 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
2427 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
2430 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2433 prev_stmt_info = vinfo_for_stmt (new_stmt);
2440 /* Function vect_setup_realignment
2442 This function is called when vectorizing an unaligned load using
2443 the dr_unaligned_software_pipeline scheme.
2444 This function generates the following code at the loop prolog:
2447 msq_init = *(floor(p)); # prolog load
2448 realignment_token = call target_builtin;
2450 msq = phi (msq_init, ---)
2452 The code above sets up a new (vector) pointer, pointing to the first
2453 location accessed by STMT, and a "floor-aligned" load using that pointer.
2454 It also generates code to compute the "realignment-token" (if the relevant
2455 target hook was defined), and creates a phi-node at the loop-header bb
2456 whose arguments are the result of the prolog-load (created by this
2457 function) and the result of a load that takes place in the loop (to be
2458 created by the caller to this function).
2459 The caller to this function uses the phi-result (msq) to create the
2460 realignment code inside the loop, and sets up the missing phi argument,
2464 msq = phi (msq_init, lsq)
2465 lsq = *(floor(p')); # load in loop
2466 result = realign_load (msq, lsq, realignment_token);
2469 STMT - (scalar) load stmt to be vectorized. This load accesses
2470 a memory location that may be unaligned.
2471 BSI - place where new code is to be inserted.
2474 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2475 target hook, if defined.
2476 Return value - the result of the loop-header phi node.
2480 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
2481 tree *realignment_token)
2483 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2484 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2485 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2486 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2487 edge pe = loop_preheader_edge (loop);
2488 tree scalar_dest = TREE_OPERAND (stmt, 0);
2501 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
2502 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2503 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true);
2504 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2505 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2506 new_temp = make_ssa_name (vec_dest, new_stmt);
2507 TREE_OPERAND (new_stmt, 0) = new_temp;
2508 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2509 gcc_assert (!new_bb);
2510 msq_init = TREE_OPERAND (new_stmt, 0);
2511 copy_virtual_operands (new_stmt, stmt);
2512 update_vuses_to_preheader (new_stmt, loop);
2514 /* 2. Create permutation mask, if required, in loop preheader. */
2515 if (targetm.vectorize.builtin_mask_for_load)
2518 tree params = build_tree_list (NULL_TREE, init_addr);
2520 vec_dest = vect_create_destination_var (scalar_dest,
2521 TREE_TYPE (new_stmt));
2522 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2523 new_stmt = build_function_call_expr (builtin_decl, params);
2524 new_stmt = build2 (MODIFY_EXPR, void_type_node, vec_dest, new_stmt);
2525 new_temp = make_ssa_name (vec_dest, new_stmt);
2526 TREE_OPERAND (new_stmt, 0) = new_temp;
2527 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2528 gcc_assert (!new_bb);
2529 *realignment_token = TREE_OPERAND (new_stmt, 0);
2531 /* The result of the CALL_EXPR to this builtin is determined from
2532 the value of the parameter and no global variables are touched
2533 which makes the builtin a "const" function. Requiring the
2534 builtin to have the "const" attribute makes it unnecessary
2535 to call mark_call_clobbered. */
2536 gcc_assert (TREE_READONLY (builtin_decl));
2539 /* 3. Create msq = phi <msq_init, lsq> in loop */
2540 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2541 msq = make_ssa_name (vec_dest, NULL_TREE);
2542 phi_stmt = create_phi_node (msq, loop->header);
2543 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2544 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2550 /* vectorizable_load.
2552 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2554 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2555 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2556 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2559 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2562 tree vec_dest = NULL;
2563 tree data_ref = NULL;
2565 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2566 stmt_vec_info prev_stmt_info;
2567 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2568 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2569 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2570 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2575 enum dr_alignment_support alignment_support_cheme;
2576 tree dataref_ptr = NULL_TREE;
2578 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2579 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2581 tree msq = NULL_TREE, lsq;
2582 tree offset = NULL_TREE;
2583 tree realignment_token = NULL_TREE;
2584 tree phi_stmt = NULL_TREE;
2586 /* Is vectorizable load? */
2587 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2590 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2592 if (STMT_VINFO_LIVE_P (stmt_info))
2594 /* FORNOW: not yet supported. */
2595 if (vect_print_dump_info (REPORT_DETAILS))
2596 fprintf (vect_dump, "value used after loop.");
2600 if (TREE_CODE (stmt) != MODIFY_EXPR)
2603 scalar_dest = TREE_OPERAND (stmt, 0);
2604 if (TREE_CODE (scalar_dest) != SSA_NAME)
2607 op = TREE_OPERAND (stmt, 1);
2608 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2611 if (!STMT_VINFO_DATA_REF (stmt_info))
2614 mode = (int) TYPE_MODE (vectype);
2616 /* FORNOW. In some cases can vectorize even if data-type not supported
2617 (e.g. - data copies). */
2618 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2620 if (vect_print_dump_info (REPORT_DETAILS))
2621 fprintf (vect_dump, "Aligned load, but unsupported type.");
2625 if (!vec_stmt) /* transformation not required. */
2627 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2633 if (vect_print_dump_info (REPORT_DETAILS))
2634 fprintf (vect_dump, "transform load.");
2636 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2637 gcc_assert (alignment_support_cheme);
2639 /* In case the vectorization factor (VF) is bigger than the number
2640 of elements that we can fit in a vectype (nunits), we have to generate
2641 more than one vector stmt - i.e - we need to "unroll" the
2642 vector stmt by a factor VF/nunits. In doing so, we record a pointer
2643 from one copy of the vector stmt to the next, in the field
2644 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
2645 stages to find the correct vector defs to be used when vectorizing
2646 stmts that use the defs of the current stmt. The example below illustrates
2647 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
2648 4 vectorized stmts):
2650 before vectorization:
2651 RELATED_STMT VEC_STMT
2655 step 1: vectorize stmt S1:
2656 We first create the vector stmt VS1_0, and, as usual, record a
2657 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
2658 Next, we create the vector stmt VS1_1, and record a pointer to
2659 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
2660 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
2662 RELATED_STMT VEC_STMT
2663 VS1_0: vx0 = memref0 VS1_1 -
2664 VS1_1: vx1 = memref1 VS1_2 -
2665 VS1_2: vx2 = memref2 VS1_3 -
2666 VS1_3: vx3 = memref3 - -
2667 S1: x = load - VS1_0
2670 See in documentation in vect_get_vec_def_for_stmt_copy for how the
2671 information we recorded in RELATED_STMT field is used to vectorize
2674 /* If the data reference is aligned (dr_aligned) or potentially unaligned
2675 on a target that supports unaligned accesses (dr_unaligned_supported)
2676 we generate the following code:
2680 p = p + indx * vectype_size;
2685 Otherwise, the data reference is potentially unaligned on a target that
2686 does not support unaligned accesses (dr_unaligned_software_pipeline) -
2687 then generate the following code, in which the data in each iteration is
2688 obtained by two vector loads, one from the previous iteration, and one
2689 from the current iteration:
2691 msq_init = *(floor(p1))
2692 p2 = initial_addr + VS - 1;
2693 realignment_token = call target_builtin;
2696 p2 = p2 + indx * vectype_size
2698 vec_dest = realign_load (msq, lsq, realignment_token)
2704 if (alignment_support_cheme == dr_unaligned_software_pipeline)
2706 msq = vect_setup_realignment (stmt, bsi, &realignment_token);
2707 phi_stmt = SSA_NAME_DEF_STMT (msq);
2708 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
2711 prev_stmt_info = NULL;
2712 for (j = 0; j < ncopies; j++)
2714 /* 1. Create the vector pointer update chain. */
2716 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset,
2717 &dummy, &ptr_incr, false);
2719 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
2721 /* 2. Create the vector-load in the loop. */
2722 switch (alignment_support_cheme)
2725 gcc_assert (aligned_access_p (dr));
2726 data_ref = build_fold_indirect_ref (dataref_ptr);
2728 case dr_unaligned_supported:
2730 int mis = DR_MISALIGNMENT (dr);
2731 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2733 gcc_assert (!aligned_access_p (dr));
2734 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2736 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
2739 case dr_unaligned_software_pipeline:
2740 gcc_assert (!aligned_access_p (dr));
2741 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2746 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2747 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2748 new_temp = make_ssa_name (vec_dest, new_stmt);
2749 TREE_OPERAND (new_stmt, 0) = new_temp;
2750 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2751 copy_virtual_operands (new_stmt, stmt);
2752 mark_new_vars_to_rename (new_stmt);
2754 /* 3. Handle explicit realignment if necessary/supported. */
2755 if (alignment_support_cheme == dr_unaligned_software_pipeline)
2758 <vec_dest = realign_load (msq, lsq, realignment_token)> */
2759 lsq = TREE_OPERAND (new_stmt, 0);
2760 if (!realignment_token)
2761 realignment_token = dataref_ptr;
2762 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2764 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
2765 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2766 new_temp = make_ssa_name (vec_dest, new_stmt);
2767 TREE_OPERAND (new_stmt, 0) = new_temp;
2768 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2769 if (j == ncopies - 1)
2770 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2775 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2777 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2778 prev_stmt_info = vinfo_for_stmt (new_stmt);
2785 /* Function vectorizable_live_operation.
2787 STMT computes a value that is used outside the loop. Check if
2788 it can be supported. */
2791 vectorizable_live_operation (tree stmt,
2792 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2793 tree *vec_stmt ATTRIBUTE_UNUSED)
2796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2797 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2799 enum tree_code code;
2803 enum vect_def_type dt;
2805 if (!STMT_VINFO_LIVE_P (stmt_info))
2808 if (TREE_CODE (stmt) != MODIFY_EXPR)
2811 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2814 operation = TREE_OPERAND (stmt, 1);
2815 code = TREE_CODE (operation);
2817 op_type = TREE_CODE_LENGTH (code);
2819 /* FORNOW: support only if all uses are invariant. This means
2820 that the scalar operations can remain in place, unvectorized.
2821 The original last scalar value that they compute will be used. */
2823 for (i = 0; i < op_type; i++)
2825 op = TREE_OPERAND (operation, i);
2826 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
2828 if (vect_print_dump_info (REPORT_DETAILS))
2829 fprintf (vect_dump, "use not simple.");
2833 if (dt != vect_invariant_def && dt != vect_constant_def)
2837 /* No transformation is required for the cases we currently support. */
2842 /* Function vect_is_simple_cond.
2845 LOOP - the loop that is being vectorized.
2846 COND - Condition that is checked for simple use.
2848 Returns whether a COND can be vectorized. Checks whether
2849 condition operands are supportable using vec_is_simple_use. */
2852 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
2856 enum vect_def_type dt;
2858 if (!COMPARISON_CLASS_P (cond))
2861 lhs = TREE_OPERAND (cond, 0);
2862 rhs = TREE_OPERAND (cond, 1);
2864 if (TREE_CODE (lhs) == SSA_NAME)
2866 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
2867 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
2870 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
2873 if (TREE_CODE (rhs) == SSA_NAME)
2875 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
2876 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
2879 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
2885 /* vectorizable_condition.
2887 Check if STMT is conditional modify expression that can be vectorized.
2888 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2889 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
2892 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2895 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2897 tree scalar_dest = NULL_TREE;
2898 tree vec_dest = NULL_TREE;
2899 tree op = NULL_TREE;
2900 tree cond_expr, then_clause, else_clause;
2901 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2902 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2903 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2904 tree vec_compare, vec_cond_expr;
2906 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2907 enum machine_mode vec_mode;
2909 enum vect_def_type dt;
2910 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2911 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2913 gcc_assert (ncopies >= 1);
2915 return false; /* FORNOW */
2917 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2920 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2922 if (STMT_VINFO_LIVE_P (stmt_info))
2924 /* FORNOW: not yet supported. */
2925 if (vect_print_dump_info (REPORT_DETAILS))
2926 fprintf (vect_dump, "value used after loop.");
2930 if (TREE_CODE (stmt) != MODIFY_EXPR)
2933 op = TREE_OPERAND (stmt, 1);
2935 if (TREE_CODE (op) != COND_EXPR)
2938 cond_expr = TREE_OPERAND (op, 0);
2939 then_clause = TREE_OPERAND (op, 1);
2940 else_clause = TREE_OPERAND (op, 2);
2942 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2945 /* We do not handle two different vector types for the condition
2947 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
2950 if (TREE_CODE (then_clause) == SSA_NAME)
2952 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2953 if (!vect_is_simple_use (then_clause, loop_vinfo,
2954 &then_def_stmt, &def, &dt))
2957 else if (TREE_CODE (then_clause) != INTEGER_CST
2958 && TREE_CODE (then_clause) != REAL_CST)
2961 if (TREE_CODE (else_clause) == SSA_NAME)
2963 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2964 if (!vect_is_simple_use (else_clause, loop_vinfo,
2965 &else_def_stmt, &def, &dt))
2968 else if (TREE_CODE (else_clause) != INTEGER_CST
2969 && TREE_CODE (else_clause) != REAL_CST)
2973 vec_mode = TYPE_MODE (vectype);
2977 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2978 return expand_vec_cond_expr_p (op, vec_mode);
2984 scalar_dest = TREE_OPERAND (stmt, 0);
2985 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2987 /* Handle cond expr. */
2989 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2991 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2992 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2993 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2995 /* Arguments are ready. create the new vector stmt. */
2996 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2997 vec_cond_lhs, vec_cond_rhs);
2998 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
2999 vec_compare, vec_then_clause, vec_else_clause);
3001 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
3002 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3003 TREE_OPERAND (*vec_stmt, 0) = new_temp;
3004 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3009 /* Function vect_transform_stmt.
3011 Create a vectorized stmt to replace STMT, and insert it at BSI. */
3014 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
3016 bool is_store = false;
3017 tree vec_stmt = NULL_TREE;
3018 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3019 tree orig_stmt_in_pattern;
3022 if (STMT_VINFO_RELEVANT_P (stmt_info))
3024 switch (STMT_VINFO_TYPE (stmt_info))
3026 case type_demotion_vec_info_type:
3027 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
3031 case type_promotion_vec_info_type:
3032 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
3036 case op_vec_info_type:
3037 done = vectorizable_operation (stmt, bsi, &vec_stmt);
3041 case assignment_vec_info_type:
3042 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
3046 case load_vec_info_type:
3047 done = vectorizable_load (stmt, bsi, &vec_stmt);
3051 case store_vec_info_type:
3052 done = vectorizable_store (stmt, bsi, &vec_stmt);
3057 case condition_vec_info_type:
3058 done = vectorizable_condition (stmt, bsi, &vec_stmt);
3063 if (vect_print_dump_info (REPORT_DETAILS))
3064 fprintf (vect_dump, "stmt not supported.");
3068 gcc_assert (vec_stmt);
3069 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
3070 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
3071 if (orig_stmt_in_pattern)
3073 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
3074 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3076 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
3078 /* STMT was inserted by the vectorizer to replace a computation
3079 idiom. ORIG_STMT_IN_PATTERN is a stmt in the original
3080 sequence that computed this idiom. We need to record a pointer
3081 to VEC_STMT in the stmt_info of ORIG_STMT_IN_PATTERN. See more
3082 detail in the documentation of vect_pattern_recog. */
3084 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
3089 if (STMT_VINFO_LIVE_P (stmt_info))
3091 switch (STMT_VINFO_TYPE (stmt_info))
3093 case reduc_vec_info_type:
3094 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
3099 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
3108 /* This function builds ni_name = number of iterations loop executes
3109 on the loop preheader. */
3112 vect_build_loop_niters (loop_vec_info loop_vinfo)
3114 tree ni_name, stmt, var;
3116 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3117 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3119 var = create_tmp_var (TREE_TYPE (ni), "niters");
3120 add_referenced_var (var);
3121 ni_name = force_gimple_operand (ni, &stmt, false, var);
3123 pe = loop_preheader_edge (loop);
3126 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3127 gcc_assert (!new_bb);
3134 /* This function generates the following statements:
3136 ni_name = number of iterations loop executes
3137 ratio = ni_name / vf
3138 ratio_mult_vf_name = ratio * vf
3140 and places them at the loop preheader edge. */
3143 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3145 tree *ratio_mult_vf_name_ptr,
3146 tree *ratio_name_ptr)
3154 tree ratio_mult_vf_name;
3155 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3156 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3157 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3160 pe = loop_preheader_edge (loop);
3162 /* Generate temporary variable that contains
3163 number of iterations loop executes. */
3165 ni_name = vect_build_loop_niters (loop_vinfo);
3166 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
3168 /* Create: ratio = ni >> log2(vf) */
3170 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
3171 if (!is_gimple_val (ratio_name))
3173 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3174 add_referenced_var (var);
3176 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
3177 pe = loop_preheader_edge (loop);
3178 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3179 gcc_assert (!new_bb);
3182 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3184 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
3185 ratio_name, log_vf);
3186 if (!is_gimple_val (ratio_mult_vf_name))
3188 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3189 add_referenced_var (var);
3191 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
3193 pe = loop_preheader_edge (loop);
3194 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3195 gcc_assert (!new_bb);
3198 *ni_name_ptr = ni_name;
3199 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3200 *ratio_name_ptr = ratio_name;
3206 /* Function update_vuses_to_preheader.
3209 STMT - a statement with potential VUSEs.
3210 LOOP - the loop whose preheader will contain STMT.
3212 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
3213 appears to be defined in a V_MAY_DEF in another statement in a loop.
3214 One such case is when the VUSE is at the dereference of a __restricted__
3215 pointer in a load and the V_MAY_DEF is at the dereference of a different
3216 __restricted__ pointer in a store. Vectorization may result in
3217 copy_virtual_uses being called to copy the problematic VUSE to a new
3218 statement that is being inserted in the loop preheader. This procedure
3219 is called to change the SSA_NAME in the new statement's VUSE from the
3220 SSA_NAME updated in the loop to the related SSA_NAME available on the
3221 path entering the loop.
3223 When this function is called, we have the following situation:
3228 # name1 = phi < name0 , name2>
3233 # name2 = vdef <name1>
3238 Stmt S1 was created in the loop preheader block as part of misaligned-load
3239 handling. This function fixes the name of the vuse of S1 from 'name1' to
3243 update_vuses_to_preheader (tree stmt, struct loop *loop)
3245 basic_block header_bb = loop->header;
3246 edge preheader_e = loop_preheader_edge (loop);
3248 use_operand_p use_p;
3250 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
3252 tree ssa_name = USE_FROM_PTR (use_p);
3253 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
3254 tree name_var = SSA_NAME_VAR (ssa_name);
3255 basic_block bb = bb_for_stmt (def_stmt);
3257 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
3258 if (!IS_EMPTY_STMT (def_stmt)
3259 && flow_bb_inside_loop_p (loop, bb))
3261 /* If the block containing the statement defining the SSA_NAME
3262 is in the loop then it's necessary to find the definition
3263 outside the loop using the PHI nodes of the header. */
3265 bool updated = false;
3267 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
3269 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
3271 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
3276 gcc_assert (updated);
3282 /* Function vect_update_ivs_after_vectorizer.
3284 "Advance" the induction variables of LOOP to the value they should take
3285 after the execution of LOOP. This is currently necessary because the
3286 vectorizer does not handle induction variables that are used after the
3287 loop. Such a situation occurs when the last iterations of LOOP are
3289 1. We introduced new uses after LOOP for IVs that were not originally used
3290 after LOOP: the IVs of LOOP are now used by an epilog loop.
3291 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3292 times, whereas the loop IVs should be bumped N times.
3295 - LOOP - a loop that is going to be vectorized. The last few iterations
3296 of LOOP were peeled.
3297 - NITERS - the number of iterations that LOOP executes (before it is
3298 vectorized). i.e, the number of times the ivs should be bumped.
3299 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3300 coming out from LOOP on which there are uses of the LOOP ivs
3301 (this is the path from LOOP->exit to epilog_loop->preheader).
3303 The new definitions of the ivs are placed in LOOP->exit.
3304 The phi args associated with the edge UPDATE_E in the bb
3305 UPDATE_E->dest are updated accordingly.
3307 Assumption 1: Like the rest of the vectorizer, this function assumes
3308 a single loop exit that has a single predecessor.
3310 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3311 organized in the same order.
3313 Assumption 3: The access function of the ivs is simple enough (see
3314 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3316 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3317 coming out of LOOP on which the ivs of LOOP are used (this is the path
3318 that leads to the epilog loop; other paths skip the epilog loop). This
3319 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3320 needs to have its phis updated.
3324 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
3327 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3328 basic_block exit_bb = loop->single_exit->dest;
3330 basic_block update_bb = update_e->dest;
3332 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
3334 /* Make sure there exists a single-predecessor exit bb: */
3335 gcc_assert (single_pred_p (exit_bb));
3337 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3339 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3341 tree access_fn = NULL;
3342 tree evolution_part;
3345 tree var, stmt, ni, ni_name;
3346 block_stmt_iterator last_bsi;
3348 if (vect_print_dump_info (REPORT_DETAILS))
3350 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
3351 print_generic_expr (vect_dump, phi, TDF_SLIM);
3354 /* Skip virtual phi's. */
3355 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3357 if (vect_print_dump_info (REPORT_DETAILS))
3358 fprintf (vect_dump, "virtual phi. skip.");
3362 /* Skip reduction phis. */
3363 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
3365 if (vect_print_dump_info (REPORT_DETAILS))
3366 fprintf (vect_dump, "reduc phi. skip.");
3370 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3371 gcc_assert (access_fn);
3373 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3374 gcc_assert (evolution_part != NULL_TREE);
3376 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3377 of degree >= 2 or exponential. */
3378 gcc_assert (!tree_is_chrec (evolution_part));
3380 step_expr = evolution_part;
3381 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3384 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3385 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
3386 niters, step_expr), init_expr);
3388 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3389 add_referenced_var (var);
3391 ni_name = force_gimple_operand (ni, &stmt, false, var);
3393 /* Insert stmt into exit_bb. */
3394 last_bsi = bsi_last (exit_bb);
3396 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3398 /* Fix phi expressions in the successor bb. */
3399 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
3404 /* Function vect_do_peeling_for_loop_bound
3406 Peel the last iterations of the loop represented by LOOP_VINFO.
3407 The peeled iterations form a new epilog loop. Given that the loop now
3408 iterates NITERS times, the new epilog loop iterates
3409 NITERS % VECTORIZATION_FACTOR times.
3411 The original loop will later be made to iterate
3412 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3415 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3416 struct loops *loops)
3418 tree ni_name, ratio_mult_vf_name;
3419 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3420 struct loop *new_loop;
3422 basic_block preheader;
3425 if (vect_print_dump_info (REPORT_DETAILS))
3426 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
3428 initialize_original_copy_tables ();
3430 /* Generate the following variables on the preheader of original loop:
3432 ni_name = number of iteration the original loop executes
3433 ratio = ni_name / vf
3434 ratio_mult_vf_name = ratio * vf */
3435 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3436 &ratio_mult_vf_name, ratio);
3438 loop_num = loop->num;
3439 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
3440 ratio_mult_vf_name, ni_name, false);
3441 gcc_assert (new_loop);
3442 gcc_assert (loop_num == loop->num);
3443 #ifdef ENABLE_CHECKING
3444 slpeel_verify_cfg_after_peeling (loop, new_loop);
3447 /* A guard that controls whether the new_loop is to be executed or skipped
3448 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3449 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3450 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3451 is on the path where the LOOP IVs are used and need to be updated. */
3453 preheader = loop_preheader_edge (new_loop)->src;
3454 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
3455 update_e = EDGE_PRED (preheader, 0);
3457 update_e = EDGE_PRED (preheader, 1);
3459 /* Update IVs of original loop as if they were advanced
3460 by ratio_mult_vf_name steps. */
3461 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
3463 /* After peeling we have to reset scalar evolution analyzer. */
3466 free_original_copy_tables ();
3470 /* Function vect_gen_niters_for_prolog_loop
3472 Set the number of iterations for the loop represented by LOOP_VINFO
3473 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3474 and the misalignment of DR - the data reference recorded in
3475 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3476 this loop, the data reference DR will refer to an aligned location.
3478 The following computation is generated:
3480 If the misalignment of DR is known at compile time:
3481 addr_mis = int mis = DR_MISALIGNMENT (dr);
3482 Else, compute address misalignment in bytes:
3483 addr_mis = addr & (vectype_size - 1)
3485 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3487 (elem_size = element type size; an element is the scalar element
3488 whose type is the inner type of the vectype) */
3491 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3493 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3494 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3495 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3497 tree iters, iters_name;
3500 tree dr_stmt = DR_STMT (dr);
3501 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3502 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3503 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3504 tree niters_type = TREE_TYPE (loop_niters);
3506 pe = loop_preheader_edge (loop);
3508 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
3510 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
3511 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
3512 int elem_misalign = byte_misalign / element_size;
3514 if (vect_print_dump_info (REPORT_DETAILS))
3515 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
3516 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
3520 tree new_stmts = NULL_TREE;
3522 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3523 tree ptr_type = TREE_TYPE (start_addr);
3524 tree size = TYPE_SIZE (ptr_type);
3525 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3526 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3527 tree elem_size_log =
3528 build_int_cst (type, exact_log2 (vectype_align/vf));
3529 tree vf_minus_1 = build_int_cst (type, vf - 1);
3530 tree vf_tree = build_int_cst (type, vf);
3534 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3535 gcc_assert (!new_bb);
3537 /* Create: byte_misalign = addr & (vectype_size - 1) */
3539 fold_build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3541 /* Create: elem_misalign = byte_misalign / element_size */
3543 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
3545 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3546 iters = fold_build2 (MINUS_EXPR, type, vf_tree, elem_misalign);
3547 iters = fold_build2 (BIT_AND_EXPR, type, iters, vf_minus_1);
3548 iters = fold_convert (niters_type, iters);
3551 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3552 /* If the loop bound is known at compile time we already verified that it is
3553 greater than vf; since the misalignment ('iters') is at most vf, there's
3554 no need to generate the MIN_EXPR in this case. */
3555 if (TREE_CODE (loop_niters) != INTEGER_CST)
3556 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
3558 if (vect_print_dump_info (REPORT_DETAILS))
3560 fprintf (vect_dump, "niters for prolog loop: ");
3561 print_generic_expr (vect_dump, iters, TDF_SLIM);
3564 var = create_tmp_var (niters_type, "prolog_loop_niters");
3565 add_referenced_var (var);
3566 iters_name = force_gimple_operand (iters, &stmt, false, var);
3568 /* Insert stmt on loop preheader edge. */
3571 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3572 gcc_assert (!new_bb);
3579 /* Function vect_update_init_of_dr
3581 NITERS iterations were peeled from LOOP. DR represents a data reference
3582 in LOOP. This function updates the information recorded in DR to
3583 account for the fact that the first NITERS iterations had already been
3584 executed. Specifically, it updates the OFFSET field of DR. */
3587 vect_update_init_of_dr (struct data_reference *dr, tree niters)
3589 tree offset = DR_OFFSET (dr);
3591 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
3592 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
3593 DR_OFFSET (dr) = offset;
3597 /* Function vect_update_inits_of_drs
3599 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3600 This function updates the information recorded for the data references in
3601 the loop to account for the fact that the first NITERS iterations had
3602 already been executed. Specifically, it updates the initial_condition of the
3603 access_function of all the data_references in the loop. */
3606 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3609 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3610 struct data_reference *dr;
3612 if (vect_dump && (dump_flags & TDF_DETAILS))
3613 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
3615 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
3616 vect_update_init_of_dr (dr, niters);
3620 /* Function vect_do_peeling_for_alignment
3622 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3623 'niters' is set to the misalignment of one of the data references in the
3624 loop, thereby forcing it to refer to an aligned location at the beginning
3625 of the execution of this loop. The data reference for which we are
3626 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3629 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3631 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3632 tree niters_of_prolog_loop, ni_name;
3634 struct loop *new_loop;
3636 if (vect_print_dump_info (REPORT_DETAILS))
3637 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
3639 initialize_original_copy_tables ();
3641 ni_name = vect_build_loop_niters (loop_vinfo);
3642 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3644 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3646 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3647 niters_of_prolog_loop, ni_name, true);
3648 gcc_assert (new_loop);
3649 #ifdef ENABLE_CHECKING
3650 slpeel_verify_cfg_after_peeling (new_loop, loop);
3653 /* Update number of times loop executes. */
3654 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3655 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
3656 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3658 /* Update the init conditions of the access functions of all data refs. */
3659 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3661 /* After peeling we have to reset scalar evolution analyzer. */
3664 free_original_copy_tables ();
3668 /* Function vect_create_cond_for_align_checks.
3670 Create a conditional expression that represents the alignment checks for
3671 all of data references (array element references) whose alignment must be
3675 LOOP_VINFO - two fields of the loop information are used.
3676 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
3677 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
3680 COND_EXPR_STMT_LIST - statements needed to construct the conditional
3682 The returned value is the conditional expression to be used in the if
3683 statement that controls which version of the loop gets executed at runtime.
3685 The algorithm makes two assumptions:
3686 1) The number of bytes "n" in a vector is a power of 2.
3687 2) An address "a" is aligned if a%n is zero and that this
3688 test can be done as a&(n-1) == 0. For example, for 16
3689 byte vectors the test is a&0xf == 0. */
3692 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
3693 tree *cond_expr_stmt_list)
3695 VEC(tree,heap) *may_misalign_stmts
3696 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
3698 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
3702 tree int_ptrsize_type;
3704 tree or_tmp_name = NULL_TREE;
3705 tree and_tmp, and_tmp_name, and_stmt;
3708 /* Check that mask is one less than a power of 2, i.e., mask is
3709 all zeros followed by all ones. */
3710 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
3712 /* CHECKME: what is the best integer or unsigned type to use to hold a
3713 cast from a pointer value? */
3714 psize = TYPE_SIZE (ptr_type_node);
3716 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
3718 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
3719 of the first vector of the i'th data reference. */
3721 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
3723 tree new_stmt_list = NULL_TREE;
3725 tree addr_tmp, addr_tmp_name, addr_stmt;
3726 tree or_tmp, new_or_tmp_name, or_stmt;
3728 /* create: addr_tmp = (int)(address_of_first_vector) */
3729 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
3733 if (new_stmt_list != NULL_TREE)
3734 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
3736 sprintf (tmp_name, "%s%d", "addr2int", i);
3737 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
3738 add_referenced_var (addr_tmp);
3739 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
3740 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
3741 addr_stmt = build2 (MODIFY_EXPR, void_type_node,
3742 addr_tmp_name, addr_stmt);
3743 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
3744 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
3746 /* The addresses are OR together. */
3748 if (or_tmp_name != NULL_TREE)
3750 /* create: or_tmp = or_tmp | addr_tmp */
3751 sprintf (tmp_name, "%s%d", "orptrs", i);
3752 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
3753 add_referenced_var (or_tmp);
3754 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
3755 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
3756 build2 (BIT_IOR_EXPR, int_ptrsize_type,
3759 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
3760 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
3761 or_tmp_name = new_or_tmp_name;
3764 or_tmp_name = addr_tmp_name;
3768 mask_cst = build_int_cst (int_ptrsize_type, mask);
3770 /* create: and_tmp = or_tmp & mask */
3771 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
3772 add_referenced_var (and_tmp);
3773 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
3775 and_stmt = build2 (MODIFY_EXPR, void_type_node,
3777 build2 (BIT_AND_EXPR, int_ptrsize_type,
3778 or_tmp_name, mask_cst));
3779 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
3780 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
3782 /* Make and_tmp the left operand of the conditional test against zero.
3783 if and_tmp has a nonzero bit then some address is unaligned. */
3784 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
3785 return build2 (EQ_EXPR, boolean_type_node,
3786 and_tmp_name, ptrsize_zero);
3790 /* Function vect_transform_loop.
3792 The analysis phase has determined that the loop is vectorizable.
3793 Vectorize the loop - created vectorized stmts to replace the scalar
3794 stmts in the loop, and update the loop exit condition. */
3797 vect_transform_loop (loop_vec_info loop_vinfo,
3798 struct loops *loops ATTRIBUTE_UNUSED)
3800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3801 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3802 int nbbs = loop->num_nodes;
3803 block_stmt_iterator si;
3806 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3810 if (vect_print_dump_info (REPORT_DETAILS))
3811 fprintf (vect_dump, "=== vec_transform_loop ===");
3813 /* If the loop has data references that may or may not be aligned then
3814 two versions of the loop need to be generated, one which is vectorized
3815 and one which isn't. A test is then generated to control which of the
3816 loops is executed. The test checks for the alignment of all of the
3817 data references that may or may not be aligned. */
3819 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
3823 tree cond_expr_stmt_list = NULL_TREE;
3824 basic_block condition_bb;
3825 block_stmt_iterator cond_exp_bsi;
3826 basic_block merge_bb;
3827 basic_block new_exit_bb;
3829 tree orig_phi, new_phi, arg;
3831 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
3832 &cond_expr_stmt_list);
3833 initialize_original_copy_tables ();
3834 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
3835 free_original_copy_tables();
3837 /** Loop versioning violates an assumption we try to maintain during
3838 vectorization - that the loop exit block has a single predecessor.
3839 After versioning, the exit block of both loop versions is the same
3840 basic block (i.e. it has two predecessors). Just in order to simplify
3841 following transformations in the vectorizer, we fix this situation
3842 here by adding a new (empty) block on the exit-edge of the loop,
3843 with the proper loop-exit phis to maintain loop-closed-form. **/
3845 merge_bb = loop->single_exit->dest;
3846 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
3847 new_exit_bb = split_edge (loop->single_exit);
3848 new_exit_e = loop->single_exit;
3849 e = EDGE_SUCC (new_exit_bb, 0);
3851 for (orig_phi = phi_nodes (merge_bb); orig_phi;
3852 orig_phi = PHI_CHAIN (orig_phi))
3854 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
3856 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
3857 add_phi_arg (new_phi, arg, new_exit_e);
3858 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
3861 /** end loop-exit-fixes after versioning **/
3863 update_ssa (TODO_update_ssa);
3864 cond_exp_bsi = bsi_last (condition_bb);
3865 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
3868 /* CHECKME: we wouldn't need this if we called update_ssa once
3870 bitmap_zero (vect_vnames_to_rename);
3872 /* Peel the loop if there are data refs with unknown alignment.
3873 Only one data ref with unknown store is allowed. */
3875 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
3876 vect_do_peeling_for_alignment (loop_vinfo, loops);
3878 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3879 compile time constant), or it is a constant that doesn't divide by the
3880 vectorization factor, then an epilog loop needs to be created.
3881 We therefore duplicate the loop: the original loop will be vectorized,
3882 and will compute the first (n/VF) iterations. The second copy of the loop
3883 will remain scalar and will compute the remaining (n%VF) iterations.
3884 (VF is the vectorization factor). */
3886 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3887 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3888 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3889 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3891 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3892 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3894 /* 1) Make sure the loop header has exactly two entries
3895 2) Make sure we have a preheader basic block. */
3897 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3899 split_edge (loop_preheader_edge (loop));
3901 /* FORNOW: the vectorizer supports only loops which body consist
3902 of one basic block (header + empty latch). When the vectorizer will
3903 support more involved loop forms, the order by which the BBs are
3904 traversed need to be reconsidered. */
3906 for (i = 0; i < nbbs; i++)
3908 basic_block bb = bbs[i];
3910 for (si = bsi_start (bb); !bsi_end_p (si);)
3912 tree stmt = bsi_stmt (si);
3913 stmt_vec_info stmt_info;
3916 if (vect_print_dump_info (REPORT_DETAILS))
3918 fprintf (vect_dump, "------>vectorizing statement: ");
3919 print_generic_expr (vect_dump, stmt, TDF_SLIM);
3921 stmt_info = vinfo_for_stmt (stmt);
3922 gcc_assert (stmt_info);
3923 if (!STMT_VINFO_RELEVANT_P (stmt_info)
3924 && !STMT_VINFO_LIVE_P (stmt_info))
3930 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
3931 != (unsigned HOST_WIDE_INT) vectorization_factor)
3932 && vect_print_dump_info (REPORT_DETAILS))
3933 fprintf (vect_dump, "multiple-types.");
3935 /* -------- vectorize statement ------------ */
3936 if (vect_print_dump_info (REPORT_DETAILS))
3937 fprintf (vect_dump, "transform statement.");
3939 is_store = vect_transform_stmt (stmt, &si);
3942 /* Free the attached stmt_vec_info and remove the stmt. */
3943 stmt_ann_t ann = stmt_ann (stmt);
3945 set_stmt_info (ann, NULL);
3946 bsi_remove (&si, true);
3954 slpeel_make_loop_iterate_ntimes (loop, ratio);
3956 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3957 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3959 /* The memory tags and pointers in vectorized statements need to
3960 have their SSA forms updated. FIXME, why can't this be delayed
3961 until all the loops have been transformed? */
3962 update_ssa (TODO_update_ssa);
3964 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3965 fprintf (vect_dump, "LOOP VECTORIZED.");