1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003,2004,2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *);
50 static void vect_align_data_ref (tree);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, bool);
54 static tree vect_create_index_for_vector_ref (loop_vec_info);
55 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
58 static tree vect_init_vector (tree, tree);
59 static void vect_finish_stmt_generation
60 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void update_vuses_to_preheader (tree, struct loop*);
63 static tree get_initial_def_for_reduction (tree, tree, tree *);
65 /* Utility function dealing with loop peeling (not peeling itself). */
66 static void vect_generate_tmps_on_preheader
67 (loop_vec_info, tree *, tree *, tree *);
68 static tree vect_build_loop_niters (loop_vec_info);
69 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
70 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
71 static void vect_update_init_of_dr (struct data_reference *, tree niters);
72 static void vect_update_inits_of_drs (loop_vec_info, tree);
73 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
74 static void vect_do_peeling_for_loop_bound
75 (loop_vec_info, tree *, struct loops *);
76 static int vect_min_worthwhile_factor (enum tree_code);
79 /* Function vect_get_new_vect_var.
81 Returns a name for a new variable. The current naming scheme appends the
82 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
83 the name of vectorizer generated variables, and appends that to NAME if
87 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
100 case vect_pointer_var:
108 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
110 new_vect_var = create_tmp_var (type, prefix);
116 /* Function vect_create_index_for_vector_ref.
118 Create (and return) an index variable, along with it's update chain in the
119 loop. This variable will be used to access a memory location in a vector
123 LOOP: The loop being vectorized.
124 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
125 function can be added here, or in the loop pre-header.
128 Return an index that will be used to index a vector array. It is expected
129 that a pointer to the first vector will be used as the base address for the
132 FORNOW: we are not trying to be efficient, just creating a new index each
133 time from scratch. At this time all vector references could use the same
136 TODO: create only one index to be used by all vector references. Record
137 the index in the LOOP_VINFO the first time this procedure is called and
138 return it on subsequent calls. The increment of this index must be placed
139 just before the conditional expression that ends the single block loop. */
142 vect_create_index_for_vector_ref (loop_vec_info loop_vinfo)
145 block_stmt_iterator incr_bsi;
147 tree indx_before_incr, indx_after_incr;
148 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
151 /* It is assumed that the base pointer used for vectorized access contains
152 the address of the first vector. Therefore the index used for vectorized
153 access must be initialized to zero and incremented by 1. */
155 init = integer_zero_node;
156 step = integer_one_node;
158 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
159 create_iv (init, step, NULL_TREE, loop, &incr_bsi, insert_after,
160 &indx_before_incr, &indx_after_incr);
161 incr = bsi_stmt (incr_bsi);
162 set_stmt_info ((tree_ann_t)stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
164 return indx_before_incr;
168 /* Function vect_create_addr_base_for_vector_ref.
170 Create an expression that computes the address of the first memory location
171 that will be accessed for a data reference.
174 STMT: The statement containing the data reference.
175 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
176 OFFSET: Optional. If supplied, it is be added to the initial address.
179 1. Return an SSA_NAME whose value is the address of the memory location of
180 the first vector of the data reference.
181 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
182 these statement(s) which define the returned SSA_NAME.
184 FORNOW: We are only handling array accesses with step 1. */
187 vect_create_addr_base_for_vector_ref (tree stmt,
191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
192 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
193 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
194 tree base_name = build_fold_indirect_ref (data_ref_base);
195 tree ref = DR_REF (dr);
196 tree scalar_type = TREE_TYPE (ref);
197 tree scalar_ptr_type = build_pointer_type (scalar_type);
200 tree addr_base, addr_expr;
202 tree base_offset = unshare_expr (DR_OFFSET (dr));
203 tree init = unshare_expr (DR_INIT (dr));
205 /* Create base_offset */
206 base_offset = size_binop (PLUS_EXPR, base_offset, init);
207 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
208 add_referenced_tmp_var (dest);
209 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
210 append_to_statement_list_force (new_stmt, new_stmt_list);
214 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
215 add_referenced_tmp_var (tmp);
216 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset,
218 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
219 base_offset, offset);
220 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
221 append_to_statement_list_force (new_stmt, new_stmt_list);
224 /* base + base_offset */
225 addr_base = fold_build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
228 /* addr_expr = addr_base */
229 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
230 get_name (base_name));
231 add_referenced_tmp_var (addr_expr);
232 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
233 new_temp = make_ssa_name (addr_expr, vec_stmt);
234 TREE_OPERAND (vec_stmt, 0) = new_temp;
235 append_to_statement_list_force (vec_stmt, new_stmt_list);
237 if (vect_print_dump_info (REPORT_DETAILS))
239 fprintf (vect_dump, "created ");
240 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
246 /* Function vect_align_data_ref.
248 Handle misalignment of a memory accesses.
250 FORNOW: Can't handle misaligned accesses.
251 Make sure that the dataref is aligned. */
254 vect_align_data_ref (tree stmt)
256 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
257 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
259 /* FORNOW: can't handle misaligned accesses;
260 all accesses expected to be aligned. */
261 gcc_assert (aligned_access_p (dr));
265 /* Function vect_create_data_ref_ptr.
267 Create a memory reference expression for vector access, to be used in a
268 vector load/store stmt. The reference is based on a new pointer to vector
272 1. STMT: a stmt that references memory. Expected to be of the form
273 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
274 2. BSI: block_stmt_iterator where new stmts can be added.
275 3. OFFSET (optional): an offset to be added to the initial address accessed
276 by the data-ref in STMT.
277 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
278 pointing to the initial address.
281 1. Declare a new ptr to vector_type, and have it point to the base of the
282 data reference (initial addressed accessed by the data reference).
283 For example, for vector of type V8HI, the following code is generated:
286 vp = (v8hi *)initial_address;
288 if OFFSET is not supplied:
289 initial_address = &a[init];
290 if OFFSET is supplied:
291 initial_address = &a[init + OFFSET];
293 Return the initial_address in INITIAL_ADDRESS.
295 2. Create a data-reference in the loop based on the new vector pointer vp,
296 and using a new index variable 'idx' as follows:
300 where if ONLY_INIT is true:
303 update = idx + vector_type_size
305 Return the pointer vp'.
308 FORNOW: handle only aligned and consecutive accesses. */
311 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
312 tree *initial_address, bool only_init)
315 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
316 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
317 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
318 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
324 tree new_stmt_list = NULL_TREE;
326 edge pe = loop_preheader_edge (loop);
332 tree type, tmp, size;
333 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
335 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
337 if (vect_print_dump_info (REPORT_DETAILS))
339 tree data_ref_base = base_name;
340 fprintf (vect_dump, "create vector-pointer variable to type: ");
341 print_generic_expr (vect_dump, vectype, TDF_SLIM);
342 if (TREE_CODE (data_ref_base) == VAR_DECL)
343 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
344 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
345 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
346 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
347 fprintf (vect_dump, " vectorizing a record based array ref: ");
348 else if (TREE_CODE (data_ref_base) == SSA_NAME)
349 fprintf (vect_dump, " vectorizing a pointer ref: ");
350 print_generic_expr (vect_dump, base_name, TDF_SLIM);
353 /** (1) Create the new vector-pointer variable: **/
355 vect_ptr_type = build_pointer_type (vectype);
356 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
357 get_name (base_name));
358 add_referenced_tmp_var (vect_ptr);
361 /** (2) Add aliasing information to the new vector-pointer:
362 (The points-to info (DR_PTR_INFO) may be defined later.) **/
364 tag = DR_MEMTAG (dr);
367 /* If tag is a variable (and NOT_A_TAG) than a new type alias
368 tag must be created with tag added to its may alias list. */
369 if (var_ann (tag)->mem_tag_kind == NOT_A_TAG)
370 new_type_alias (vect_ptr, tag);
372 var_ann (vect_ptr)->type_mem_tag = tag;
374 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
376 /** (3) Calculate the initial address the vector-pointer, and set
377 the vector-pointer to point to it before the loop: **/
379 /* Create: (&(base[init_val+offset]) in the loop preheader. */
380 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
382 pe = loop_preheader_edge (loop);
383 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
384 gcc_assert (!new_bb);
385 *initial_address = new_temp;
387 /* Create: p = (vectype *) initial_base */
388 vec_stmt = fold_convert (vect_ptr_type, new_temp);
389 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
390 new_temp = make_ssa_name (vect_ptr, vec_stmt);
391 TREE_OPERAND (vec_stmt, 0) = new_temp;
392 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
393 gcc_assert (!new_bb);
394 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
397 /** (4) Handle the updating of the vector-pointer inside the loop: **/
399 if (only_init) /* No update in loop is required. */
401 /* Copy the points-to information if it exists. */
402 if (DR_PTR_INFO (dr))
403 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
404 return vect_ptr_init;
407 idx = vect_create_index_for_vector_ref (loop_vinfo);
409 /* Create: update = idx * vectype_size */
410 tmp = create_tmp_var (integer_type_node, "update");
411 add_referenced_tmp_var (tmp);
412 size = TYPE_SIZE (vect_ptr_type);
413 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
414 ptr_update = create_tmp_var (type, "update");
415 add_referenced_tmp_var (ptr_update);
416 vectype_size = TYPE_SIZE_UNIT (vectype);
417 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
418 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
419 new_temp = make_ssa_name (tmp, vec_stmt);
420 TREE_OPERAND (vec_stmt, 0) = new_temp;
421 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
422 vec_stmt = fold_convert (type, new_temp);
423 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
424 new_temp = make_ssa_name (ptr_update, vec_stmt);
425 TREE_OPERAND (vec_stmt, 0) = new_temp;
426 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
428 /* Create: data_ref_ptr = vect_ptr_init + update */
429 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
430 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
431 new_temp = make_ssa_name (vect_ptr, vec_stmt);
432 TREE_OPERAND (vec_stmt, 0) = new_temp;
433 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
434 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
436 /* Copy the points-to information if it exists. */
437 if (DR_PTR_INFO (dr))
438 duplicate_ssa_name_ptr_info (data_ref_ptr, DR_PTR_INFO (dr));
443 /* Function vect_create_destination_var.
445 Create a new temporary of type VECTYPE. */
448 vect_create_destination_var (tree scalar_dest, tree vectype)
451 const char *new_name;
453 enum vect_var_kind kind;
455 kind = vectype ? vect_simple_var : vect_scalar_var;
456 type = vectype ? vectype : TREE_TYPE (scalar_dest);
458 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
460 new_name = get_name (scalar_dest);
463 vec_dest = vect_get_new_vect_var (type, vect_simple_var, new_name);
464 add_referenced_tmp_var (vec_dest);
470 /* Function vect_init_vector.
472 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
473 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
474 used in the vectorization of STMT. */
477 vect_init_vector (tree stmt, tree vector_var)
479 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
480 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
481 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
484 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
490 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
491 add_referenced_tmp_var (new_var);
493 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
494 new_temp = make_ssa_name (new_var, init_stmt);
495 TREE_OPERAND (init_stmt, 0) = new_temp;
497 pe = loop_preheader_edge (loop);
498 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
499 gcc_assert (!new_bb);
501 if (vect_print_dump_info (REPORT_DETAILS))
503 fprintf (vect_dump, "created new init_stmt: ");
504 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
507 vec_oprnd = TREE_OPERAND (init_stmt, 0);
512 /* Function vect_get_vec_def_for_operand.
514 OP is an operand in STMT. This function returns a (vector) def that will be
515 used in the vectorized stmt for STMT.
517 In the case that OP is an SSA_NAME which is defined in the loop, then
518 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
520 In case OP is an invariant or constant, a new stmt that creates a vector def
521 needs to be introduced. */
524 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
529 stmt_vec_info def_stmt_info = NULL;
530 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
531 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
532 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
533 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
534 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
540 enum vect_def_type dt;
543 if (vect_print_dump_info (REPORT_DETAILS))
545 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
546 print_generic_expr (vect_dump, op, TDF_SLIM);
549 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
550 gcc_assert (is_simple_use);
551 if (vect_print_dump_info (REPORT_DETAILS))
555 fprintf (vect_dump, "def = ");
556 print_generic_expr (vect_dump, def, TDF_SLIM);
560 fprintf (vect_dump, " def_stmt = ");
561 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
567 /* Case 1: operand is a constant. */
568 case vect_constant_def:
573 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
574 if (vect_print_dump_info (REPORT_DETAILS))
575 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
577 for (i = nunits - 1; i >= 0; --i)
579 t = tree_cons (NULL_TREE, op, t);
581 vec_cst = build_vector (vectype, t);
582 return vect_init_vector (stmt, vec_cst);
585 /* Case 2: operand is defined outside the loop - loop invariant. */
586 case vect_invariant_def:
591 /* Create 'vec_inv = {inv,inv,..,inv}' */
592 if (vect_print_dump_info (REPORT_DETAILS))
593 fprintf (vect_dump, "Create vector_inv.");
595 for (i = nunits - 1; i >= 0; --i)
597 t = tree_cons (NULL_TREE, def, t);
600 /* FIXME: use build_constructor directly. */
601 vec_inv = build_constructor_from_list (vectype, t);
602 return vect_init_vector (stmt, vec_inv);
605 /* Case 3: operand is defined inside the loop. */
609 *scalar_def = def_stmt;
611 /* Get the def from the vectorized stmt. */
612 def_stmt_info = vinfo_for_stmt (def_stmt);
613 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
614 gcc_assert (vec_stmt);
615 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
619 /* Case 4: operand is defined by a loop header phi - reduction */
620 case vect_reduction_def:
622 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
624 /* Get the def before the loop */
625 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
626 return get_initial_def_for_reduction (stmt, op, scalar_def);
629 /* Case 5: operand is defined by loop-header phi - induction. */
630 case vect_induction_def:
632 if (vect_print_dump_info (REPORT_DETAILS))
633 fprintf (vect_dump, "induction - unsupported.");
634 internal_error ("no support for induction"); /* FORNOW */
643 /* Function vect_finish_stmt_generation.
645 Insert a new stmt. */
648 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
650 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
652 if (vect_print_dump_info (REPORT_DETAILS))
654 fprintf (vect_dump, "add new stmt: ");
655 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
658 /* Make sure bsi points to the stmt that is being vectorized. */
659 gcc_assert (stmt == bsi_stmt (*bsi));
661 #ifdef USE_MAPPED_LOCATION
662 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
664 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
669 #define ADJUST_IN_EPILOG 1
671 /* Function get_initial_def_for_reduction
674 STMT - a stmt that performs a reduction operation in the loop.
675 INIT_VAL - the initial value of the reduction variable
678 SCALAR_DEF - a tree that holds a value to be added to the final result
679 of the reduction (used for "ADJUST_IN_EPILOG" - see below).
680 Return a vector variable, initialized according to the operation that STMT
681 performs. This vector will be used as the initial value of the
682 vector of partial results.
684 Option1 ("ADJUST_IN_EPILOG"): Initialize the vector as follows:
687 min/max: [init_val,init_val,..,init_val,init_val]
688 bit and/or: [init_val,init_val,..,init_val,init_val]
689 and when necessary (e.g. add/mult case) let the caller know
690 that it needs to adjust the result by init_val.
692 Option2: Initialize the vector as follows:
693 add: [0,0,...,0,init_val]
694 mult: [1,1,...,1,init_val]
695 min/max: [init_val,init_val,...,init_val]
696 bit and/or: [init_val,init_val,...,init_val]
697 and no adjustments are needed.
699 For example, for the following code:
705 STMT is 's = s + a[i]', and the reduction variable is 's'.
706 For a vector of 4 units, we want to return either [0,0,0,init_val],
707 or [0,0,0,0] and let the caller know that it needs to adjust
708 the result at the end by 'init_val'.
710 FORNOW: We use the "ADJUST_IN_EPILOG" scheme.
711 TODO: Use some cost-model to estimate which scheme is more profitable.
715 get_initial_def_for_reduction (tree stmt, tree init_val, tree *scalar_def)
717 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
718 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
719 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
721 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
722 tree type = TREE_TYPE (init_val);
724 tree vec, t = NULL_TREE;
725 bool need_epilog_adjust;
728 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
733 if (INTEGRAL_TYPE_P (type))
734 def = build_int_cst (type, 0);
736 def = build_real (type, dconst0);
738 #ifdef ADJUST_IN_EPILOG
739 /* All the 'nunits' elements are set to 0. The final result will be
740 adjusted by 'init_val' at the loop epilog. */
742 need_epilog_adjust = true;
744 /* 'nunits - 1' elements are set to 0; The last element is set to
745 'init_val'. No further adjustments at the epilog are needed. */
746 nelements = nunits - 1;
747 need_epilog_adjust = false;
755 need_epilog_adjust = false;
762 for (i = nelements - 1; i >= 0; --i)
763 t = tree_cons (NULL_TREE, def, t);
765 if (nelements == nunits - 1)
767 /* Set the last element of the vector. */
768 t = tree_cons (NULL_TREE, init_val, t);
771 gcc_assert (nelements == nunits);
773 if (TREE_CODE (init_val) == INTEGER_CST || TREE_CODE (init_val) == REAL_CST)
774 vec = build_vector (vectype, t);
776 vec = build_constructor_from_list (vectype, t);
778 if (!need_epilog_adjust)
779 *scalar_def = NULL_TREE;
781 *scalar_def = init_val;
783 return vect_init_vector (stmt, vec);
787 /* Function vect_create_epilog_for_reduction:
789 Create code at the loop-epilog to finalize the result of a reduction
792 LOOP_EXIT_VECT_DEF is a vector of partial results. We need to "reduce" it
793 into a single result, by applying the operation REDUC_CODE on the
794 partial-results-vector. For this, we need to create a new phi node at the
795 loop exit to preserve loop-closed form, as illustrated below.
797 STMT is the original scalar reduction stmt that is being vectorized.
798 REDUCTION_OP is the scalar reduction-variable.
799 REDUCTION_PHI is the phi-node that carries the reduction computation.
800 This function also sets the arguments for the REDUCTION_PHI:
801 The loop-entry argument is the (vectorized) initial-value of REDUCTION_OP.
802 The loop-latch argument is VECT_DEF - the vector of partial sums.
804 This function transforms this:
807 vec_def = phi <null, null> # REDUCTION_PHI
812 s_out0 = phi <s_loop> # EXIT_PHI
820 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
825 s_out0 = phi <s_loop> # EXIT_PHI
826 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
828 v_out2 = reduc_expr <v_out1>
829 s_out3 = extract_field <v_out2, 0>
836 vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
837 enum tree_code reduc_code, tree reduction_phi)
839 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
840 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
841 enum machine_mode mode = TYPE_MODE (vectype);
842 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
843 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
845 tree scalar_dest = TREE_OPERAND (stmt, 0);
846 tree scalar_type = TREE_TYPE (scalar_dest);
848 block_stmt_iterator exit_bsi;
853 tree new_scalar_dest, exit_phi;
854 tree bitsize, bitpos, bytesize;
855 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
856 tree scalar_initial_def;
857 tree vec_initial_def;
859 imm_use_iterator imm_iter;
861 bool extract_scalar_result;
863 /*** 1. Create the reduction def-use cycle ***/
865 /* 1.1 set the loop-entry arg of the reduction-phi: */
866 /* For the case of reduction, vect_get_vec_def_for_operand returns
867 the scalar def before the loop, that defines the initial value
868 of the reduction variable. */
869 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
870 &scalar_initial_def);
871 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
874 /* 1.2 set the loop-latch arg for the reduction-phi: */
875 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
877 if (vect_print_dump_info (REPORT_DETAILS))
879 fprintf (vect_dump, "transform reduction: created def-use cycle:");
880 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
881 fprintf (vect_dump, "\n");
882 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
886 /*** 2. Create epilog code ***/
888 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
889 v_out1 = phi <v_loop> */
891 exit_bb = loop->single_exit->dest;
892 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
893 SET_PHI_ARG_DEF (new_phi, loop->single_exit->dest_idx, vect_def);
895 exit_bsi = bsi_start (exit_bb);
898 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
899 bitsize = TYPE_SIZE (scalar_type);
900 bytesize = TYPE_SIZE_UNIT (scalar_type);
902 /* 2.2 Create the reduction code. */
904 if (reduc_code < NUM_TREE_CODES)
907 v_out2 = reduc_expr <v_out1> */
909 if (vect_print_dump_info (REPORT_DETAILS))
910 fprintf (vect_dump, "Reduce using direct vector reduction.");
912 vec_dest = vect_create_destination_var (scalar_dest, vectype);
913 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
914 build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
915 new_temp = make_ssa_name (vec_dest, epilog_stmt);
916 TREE_OPERAND (epilog_stmt, 0) = new_temp;
917 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
919 extract_scalar_result = true;
923 enum tree_code shift_code = 0;
924 bool have_whole_vector_shift = true;
925 enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
927 int element_bitsize = tree_low_cst (bitsize, 1);
928 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
931 /* The result of the reduction is expected to be at the least
932 significant bits of the vector. This is merely convention,
933 as it's the extraction later that really matters, and that
934 is also under our control. */
935 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
936 shift_code = VEC_RSHIFT_EXPR;
938 have_whole_vector_shift = false;
940 /* Regardless of whether we have a whole vector shift, if we're
941 emulating the operation via tree-vect-generic, we don't want
942 to use it. Only the first round of the reduction is likely
943 to still be profitable via emulation. */
944 /* ??? It might be better to emit a reduction tree code here, so that
945 tree-vect-generic can expand the first round via bit tricks. */
946 if (!VECTOR_MODE_P (mode))
947 have_whole_vector_shift = false;
950 optab optab = optab_for_tree_code (code, vectype);
951 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
952 have_whole_vector_shift = false;
955 if (have_whole_vector_shift)
958 for (offset = VS/2; offset >= element_size; offset/=2)
960 Create: va' = vec_shift <va, offset>
961 Create: va = vop <va, va'>
964 if (vect_print_dump_info (REPORT_DETAILS))
965 fprintf (vect_dump, "Reduce using vector shifts");
967 vec_dest = vect_create_destination_var (scalar_dest, vectype);
968 new_temp = PHI_RESULT (new_phi);
970 for (bit_offset = vec_size_in_bits/2;
971 bit_offset >= element_bitsize;
974 tree bitpos = size_int (bit_offset);
976 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
977 build2 (shift_code, vectype, new_temp, bitpos));
978 new_name = make_ssa_name (vec_dest, epilog_stmt);
979 TREE_OPERAND (epilog_stmt, 0) = new_name;
980 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
981 if (vect_print_dump_info (REPORT_DETAILS))
982 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
985 epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
986 build2 (code, vectype, new_name, new_temp));
987 new_temp = make_ssa_name (vec_dest, epilog_stmt);
988 TREE_OPERAND (epilog_stmt, 0) = new_temp;
989 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
990 if (vect_print_dump_info (REPORT_DETAILS))
991 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
994 extract_scalar_result = true;
1002 s = extract_field <v_out2, 0>
1003 for (offset=element_size; offset<vector_size; offset+=element_size;)
1005 Create: s' = extract_field <v_out2, offset>
1006 Create: s = op <s, s'>
1009 if (vect_print_dump_info (REPORT_DETAILS))
1010 fprintf (vect_dump, "Reduce using scalar code. ");
1012 vec_temp = PHI_RESULT (new_phi);
1013 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1015 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1018 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1019 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1021 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1022 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1023 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1024 if (vect_print_dump_info (REPORT_DETAILS))
1025 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1027 for (bit_offset = element_bitsize;
1028 bit_offset < vec_size_in_bits;
1029 bit_offset += element_bitsize)
1031 tree bitpos = bitsize_int (bit_offset);
1032 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1035 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1036 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1038 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1039 TREE_OPERAND (epilog_stmt, 0) = new_name;
1040 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1041 if (vect_print_dump_info (REPORT_DETAILS))
1042 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1045 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1046 build2 (code, scalar_type, new_name, new_temp));
1047 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1048 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1049 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1050 if (vect_print_dump_info (REPORT_DETAILS))
1051 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1054 extract_scalar_result = false;
1059 /* 2.3 Extract the final scalar result. Create:
1060 s_out3 = extract_field <v_out2, bitpos> */
1062 if (extract_scalar_result)
1066 if (vect_print_dump_info (REPORT_DETAILS))
1067 fprintf (vect_dump, "extract scalar result");
1069 /* The result is in the low order bits. */
1070 if (BITS_BIG_ENDIAN)
1071 bitpos = size_binop (MULT_EXPR,
1072 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1073 TYPE_SIZE (scalar_type));
1075 bitpos = bitsize_zero_node;
1077 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1078 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1079 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest, rhs);
1080 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1081 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1082 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1083 if (vect_print_dump_info (REPORT_DETAILS))
1084 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1088 /* 2.4 Adjust the final result by the initial value of the reduction
1089 variable. (when such adjustment is not needed, then
1090 'scalar_initial_def' is zero).
1093 s_out = scalar_expr <s_out, scalar_initial_def> */
1095 if (scalar_initial_def)
1097 epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
1098 build2 (code, scalar_type, new_temp, scalar_initial_def));
1099 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1100 TREE_OPERAND (epilog_stmt, 0) = new_temp;
1101 bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
1103 if (vect_print_dump_info (REPORT_DETAILS))
1104 print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
1108 /* 2.5 Replace uses of s_out0 with uses of s_out3 */
1110 /* Find the loop-closed-use at the loop exit of the original
1111 scalar result. (The reduction result is expected to have
1112 two immediate uses - one at the latch block, and one at the
1115 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1117 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1119 exit_phi = USE_STMT (use_p);
1124 orig_name = PHI_RESULT (exit_phi);
1126 FOR_EACH_IMM_USE_SAFE (use_p, imm_iter, orig_name)
1127 SET_USE (use_p, new_temp);
1131 /* Function vectorizable_reduction.
1133 Check if STMT performs a reduction operation that can be vectorized.
1134 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1135 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1136 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1139 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1146 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1147 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1148 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1150 enum tree_code code, reduc_code = 0;
1151 enum machine_mode vec_mode;
1153 optab optab, reduc_optab;
1155 tree def0, def1, def_stmt0, def_stmt1;
1156 enum vect_def_type dt0, dt1;
1159 bool is_simple_use0;
1160 bool is_simple_use1;
1162 /* Is vectorizable reduction? */
1164 /* Not supportable if the reduction variable is used in the loop. */
1165 if (STMT_VINFO_RELEVANT_P (stmt_info))
1168 if (!STMT_VINFO_LIVE_P (stmt_info))
1171 /* Make sure it was already recognized as a reduction pattern. */
1172 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
1175 gcc_assert (TREE_CODE (stmt) == MODIFY_EXPR);
1177 operation = TREE_OPERAND (stmt, 1);
1178 code = TREE_CODE (operation);
1179 op_type = TREE_CODE_LENGTH (code);
1181 if (op_type != binary_op)
1184 op0 = TREE_OPERAND (operation, 0);
1185 op1 = TREE_OPERAND (operation, 1);
1186 scalar_dest = TREE_OPERAND (stmt, 0);
1187 scalar_type = TREE_TYPE (scalar_dest);
1189 /* Check the first operand. It is expected to be defined inside the loop. */
1191 vect_is_simple_use (op0, loop_vinfo, &def_stmt0, &def0, &dt0);
1193 vect_is_simple_use (op1, loop_vinfo, &def_stmt1, &def1, &dt1);
1195 gcc_assert (is_simple_use0);
1196 gcc_assert (is_simple_use1);
1197 gcc_assert (dt0 == vect_loop_def);
1198 gcc_assert (dt1 == vect_reduction_def);
1199 gcc_assert (TREE_CODE (def_stmt1) == PHI_NODE);
1200 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt1));
1202 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt1)))
1205 /* Supportable by target? */
1207 /* check support for the operation in the loop */
1208 optab = optab_for_tree_code (code, vectype);
1211 if (vect_print_dump_info (REPORT_DETAILS))
1212 fprintf (vect_dump, "no optab.");
1215 vec_mode = TYPE_MODE (vectype);
1216 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1218 if (vect_print_dump_info (REPORT_DETAILS))
1219 fprintf (vect_dump, "op not supported by target.");
1220 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1221 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1222 < vect_min_worthwhile_factor (code))
1224 if (vect_print_dump_info (REPORT_DETAILS))
1225 fprintf (vect_dump, "proceeding using word mode.");
1228 /* Worthwhile without SIMD support? */
1229 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1230 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1231 < vect_min_worthwhile_factor (code))
1233 if (vect_print_dump_info (REPORT_DETAILS))
1234 fprintf (vect_dump, "not worthwhile without SIMD support.");
1238 /* check support for the epilog operation */
1239 if (!reduction_code_for_scalar_code (code, &reduc_code))
1241 reduc_optab = optab_for_tree_code (reduc_code, vectype);
1244 if (vect_print_dump_info (REPORT_DETAILS))
1245 fprintf (vect_dump, "no optab for reduction.");
1246 reduc_code = NUM_TREE_CODES;
1248 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1250 if (vect_print_dump_info (REPORT_DETAILS))
1251 fprintf (vect_dump, "reduc op not supported by target.");
1252 reduc_code = NUM_TREE_CODES;
1255 if (!vec_stmt) /* transformation not required. */
1257 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
1263 if (vect_print_dump_info (REPORT_DETAILS))
1264 fprintf (vect_dump, "transform reduction.");
1266 /* Create the destination vector */
1267 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1270 /* Create the reduction-phi that defines the reduction-operand. */
1271 new_phi = create_phi_node (vec_dest, loop->header);
1274 /* Prepare the operand that is defined inside the loop body */
1275 loop_vec_def = vect_get_vec_def_for_operand (op0, stmt, NULL);
1277 /* Create the vectorized operation that computes the partial results */
1278 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1279 build2 (code, vectype, loop_vec_def, PHI_RESULT (new_phi)));
1280 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1281 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1282 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1285 /* Finalize the reduction-phi (set it's arguments) and create the
1286 epilog reduction code. */
1287 vect_create_epilog_for_reduction (new_temp, stmt, op1, reduc_code, new_phi);
1292 /* Function vectorizable_assignment.
1294 Check if STMT performs an assignment (copy) that can be vectorized.
1295 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1296 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1297 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1300 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1306 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1307 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1308 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1311 enum vect_def_type dt;
1313 /* Is vectorizable assignment? */
1314 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1317 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1319 if (TREE_CODE (stmt) != MODIFY_EXPR)
1322 scalar_dest = TREE_OPERAND (stmt, 0);
1323 if (TREE_CODE (scalar_dest) != SSA_NAME)
1326 op = TREE_OPERAND (stmt, 1);
1327 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1329 if (vect_print_dump_info (REPORT_DETAILS))
1330 fprintf (vect_dump, "use not simple.");
1334 if (!vec_stmt) /* transformation not required. */
1336 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1341 if (vect_print_dump_info (REPORT_DETAILS))
1342 fprintf (vect_dump, "transform assignment.");
1345 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1348 op = TREE_OPERAND (stmt, 1);
1349 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
1351 /* Arguments are ready. create the new vector stmt. */
1352 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1353 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1354 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1355 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1361 /* Function vect_min_worthwhile_factor.
1363 For a loop where we could vectorize the operation indicated by CODE,
1364 return the minimum vectorization factor that makes it worthwhile
1365 to use generic vectors. */
1367 vect_min_worthwhile_factor (enum tree_code code)
1388 /* Function vectorizable_operation.
1390 Check if STMT performs a binary or unary operation that can be vectorized.
1391 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1392 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1393 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1396 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1401 tree op0, op1 = NULL;
1402 tree vec_oprnd0, vec_oprnd1=NULL;
1403 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1404 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1405 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1407 enum tree_code code;
1408 enum machine_mode vec_mode;
1414 enum machine_mode optab_op2_mode;
1416 enum vect_def_type dt;
1418 /* Is STMT a vectorizable binary/unary operation? */
1419 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1422 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1424 if (STMT_VINFO_LIVE_P (stmt_info))
1426 /* FORNOW: not yet supported. */
1427 if (vect_print_dump_info (REPORT_DETAILS))
1428 fprintf (vect_dump, "value used after loop.");
1432 if (TREE_CODE (stmt) != MODIFY_EXPR)
1435 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1438 operation = TREE_OPERAND (stmt, 1);
1439 code = TREE_CODE (operation);
1440 optab = optab_for_tree_code (code, vectype);
1442 /* Support only unary or binary operations. */
1443 op_type = TREE_CODE_LENGTH (code);
1444 if (op_type != unary_op && op_type != binary_op)
1446 if (vect_print_dump_info (REPORT_DETAILS))
1447 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
1451 for (i = 0; i < op_type; i++)
1453 op = TREE_OPERAND (operation, i);
1454 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1456 if (vect_print_dump_info (REPORT_DETAILS))
1457 fprintf (vect_dump, "use not simple.");
1462 /* Supportable by target? */
1465 if (vect_print_dump_info (REPORT_DETAILS))
1466 fprintf (vect_dump, "no optab.");
1469 vec_mode = TYPE_MODE (vectype);
1470 icode = (int) optab->handlers[(int) vec_mode].insn_code;
1471 if (icode == CODE_FOR_nothing)
1473 if (vect_print_dump_info (REPORT_DETAILS))
1474 fprintf (vect_dump, "op not supported by target.");
1475 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
1476 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1477 < vect_min_worthwhile_factor (code))
1479 if (vect_print_dump_info (REPORT_DETAILS))
1480 fprintf (vect_dump, "proceeding using word mode.");
1483 /* Worthwhile without SIMD support? */
1484 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1485 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
1486 < vect_min_worthwhile_factor (code))
1488 if (vect_print_dump_info (REPORT_DETAILS))
1489 fprintf (vect_dump, "not worthwhile without SIMD support.");
1493 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1495 /* FORNOW: not yet supported. */
1496 if (!VECTOR_MODE_P (vec_mode))
1499 /* Invariant argument is needed for a vector shift
1500 by a scalar shift operand. */
1501 optab_op2_mode = insn_data[icode].operand[2].mode;
1502 if (! (VECTOR_MODE_P (optab_op2_mode)
1503 || dt == vect_constant_def
1504 || dt == vect_invariant_def))
1506 if (vect_print_dump_info (REPORT_DETAILS))
1507 fprintf (vect_dump, "operand mode requires invariant argument.");
1512 if (!vec_stmt) /* transformation not required. */
1514 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1520 if (vect_print_dump_info (REPORT_DETAILS))
1521 fprintf (vect_dump, "transform binary/unary operation.");
1524 scalar_dest = TREE_OPERAND (stmt, 0);
1525 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1528 op0 = TREE_OPERAND (operation, 0);
1529 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
1531 if (op_type == binary_op)
1533 op1 = TREE_OPERAND (operation, 1);
1535 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
1537 /* Vector shl and shr insn patterns can be defined with
1538 scalar operand 2 (shift operand). In this case, use
1539 constant or loop invariant op1 directly, without
1540 extending it to vector mode first. */
1542 optab_op2_mode = insn_data[icode].operand[2].mode;
1543 if (!VECTOR_MODE_P (optab_op2_mode))
1545 if (vect_print_dump_info (REPORT_DETAILS))
1546 fprintf (vect_dump, "operand 1 using scalar mode.");
1552 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
1555 /* Arguments are ready. create the new vector stmt. */
1557 if (op_type == binary_op)
1558 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1559 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1561 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1562 build1 (code, vectype, vec_oprnd0));
1563 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1564 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1565 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1571 /* Function vectorizable_store.
1573 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1575 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1576 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1577 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1580 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1587 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1588 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1589 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1590 enum machine_mode vec_mode;
1592 enum dr_alignment_support alignment_support_cheme;
1595 enum vect_def_type dt;
1597 /* Is vectorizable store? */
1599 if (TREE_CODE (stmt) != MODIFY_EXPR)
1602 scalar_dest = TREE_OPERAND (stmt, 0);
1603 if (TREE_CODE (scalar_dest) != ARRAY_REF
1604 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1607 op = TREE_OPERAND (stmt, 1);
1608 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 fprintf (vect_dump, "use not simple.");
1615 vec_mode = TYPE_MODE (vectype);
1616 /* FORNOW. In some cases can vectorize even if data-type not supported
1617 (e.g. - array initialization with 0). */
1618 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1621 if (!STMT_VINFO_DATA_REF (stmt_info))
1625 if (!vec_stmt) /* transformation not required. */
1627 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1633 if (vect_print_dump_info (REPORT_DETAILS))
1634 fprintf (vect_dump, "transform store");
1636 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1637 gcc_assert (alignment_support_cheme);
1638 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
1640 /* Handle use - get the vectorized def from the defining stmt. */
1641 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt, NULL);
1644 /* FORNOW: make sure the data reference is aligned. */
1645 vect_align_data_ref (stmt);
1646 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1647 data_ref = build_fold_indirect_ref (data_ref);
1649 /* Arguments are ready. create the new vector stmt. */
1650 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1651 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1653 /* Copy the V_MAY_DEFS representing the aliasing of the original array
1654 element's definition to the vector's definition then update the
1655 defining statement. The original is being deleted so the same
1656 SSA_NAMEs can be used. */
1657 copy_virtual_operands (*vec_stmt, stmt);
1659 FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_VMAYDEF)
1661 SSA_NAME_DEF_STMT (def) = *vec_stmt;
1663 /* If this virtual def has a use outside the loop and a loop peel is
1664 performed then the def may be renamed by the peel. Mark it for
1665 renaming so the later use will also be renamed. */
1666 mark_sym_for_renaming (SSA_NAME_VAR (def));
1673 /* vectorizable_load.
1675 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1677 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1678 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1679 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1682 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1685 tree vec_dest = NULL;
1686 tree data_ref = NULL;
1688 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1689 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1690 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1698 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1699 edge pe = loop_preheader_edge (loop);
1700 enum dr_alignment_support alignment_support_cheme;
1702 /* Is vectorizable load? */
1703 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1706 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
1708 if (STMT_VINFO_LIVE_P (stmt_info))
1710 /* FORNOW: not yet supported. */
1711 if (vect_print_dump_info (REPORT_DETAILS))
1712 fprintf (vect_dump, "value used after loop.");
1716 if (TREE_CODE (stmt) != MODIFY_EXPR)
1719 scalar_dest = TREE_OPERAND (stmt, 0);
1720 if (TREE_CODE (scalar_dest) != SSA_NAME)
1723 op = TREE_OPERAND (stmt, 1);
1724 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1727 if (!STMT_VINFO_DATA_REF (stmt_info))
1730 mode = (int) TYPE_MODE (vectype);
1732 /* FORNOW. In some cases can vectorize even if data-type not supported
1733 (e.g. - data copies). */
1734 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1736 if (vect_print_dump_info (REPORT_DETAILS))
1737 fprintf (vect_dump, "Aligned load, but unsupported type.");
1741 if (!vec_stmt) /* transformation not required. */
1743 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1749 if (vect_print_dump_info (REPORT_DETAILS))
1750 fprintf (vect_dump, "transform load.");
1752 alignment_support_cheme = vect_supportable_dr_alignment (dr);
1753 gcc_assert (alignment_support_cheme);
1755 if (alignment_support_cheme == dr_aligned
1756 || alignment_support_cheme == dr_unaligned_supported)
1767 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1768 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1769 if (aligned_access_p (dr))
1770 data_ref = build_fold_indirect_ref (data_ref);
1773 int mis = DR_MISALIGNMENT (dr);
1774 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
1775 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
1776 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1778 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1779 new_temp = make_ssa_name (vec_dest, new_stmt);
1780 TREE_OPERAND (new_stmt, 0) = new_temp;
1781 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1782 copy_virtual_operands (new_stmt, stmt);
1784 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
1788 msq_init = *(floor(p1))
1789 p2 = initial_addr + VS - 1;
1790 magic = have_builtin ? builtin_result : initial_address;
1793 p2' = p2 + indx * vectype_size
1795 vec_dest = realign_load (msq, lsq, magic)
1809 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1810 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1811 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1813 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1814 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1815 new_temp = make_ssa_name (vec_dest, new_stmt);
1816 TREE_OPERAND (new_stmt, 0) = new_temp;
1817 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1818 gcc_assert (!new_bb);
1819 msq_init = TREE_OPERAND (new_stmt, 0);
1820 copy_virtual_operands (new_stmt, stmt);
1821 update_vuses_to_preheader (new_stmt, loop);
1824 /* <2> Create lsq = *(floor(p2')) in the loop */
1825 offset = build_int_cst (integer_type_node,
1826 TYPE_VECTOR_SUBPARTS (vectype));
1827 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1828 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1829 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1830 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1831 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1832 new_temp = make_ssa_name (vec_dest, new_stmt);
1833 TREE_OPERAND (new_stmt, 0) = new_temp;
1834 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1835 lsq = TREE_OPERAND (new_stmt, 0);
1836 copy_virtual_operands (new_stmt, stmt);
1840 if (targetm.vectorize.builtin_mask_for_load)
1842 /* Create permutation mask, if required, in loop preheader. */
1844 params = build_tree_list (NULL_TREE, init_addr);
1845 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1846 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1847 new_stmt = build_function_call_expr (builtin_decl, params);
1848 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1849 new_temp = make_ssa_name (vec_dest, new_stmt);
1850 TREE_OPERAND (new_stmt, 0) = new_temp;
1851 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1852 gcc_assert (!new_bb);
1853 magic = TREE_OPERAND (new_stmt, 0);
1855 /* The result of the CALL_EXPR to this builtin is determined from
1856 the value of the parameter and no global variables are touched
1857 which makes the builtin a "const" function. Requiring the
1858 builtin to have the "const" attribute makes it unnecessary
1859 to call mark_call_clobbered_vars_to_rename. */
1860 gcc_assert (TREE_READONLY (builtin_decl));
1864 /* Use current address instead of init_addr for reduced reg pressure.
1866 magic = dataref_ptr;
1870 /* <4> Create msq = phi <msq_init, lsq> in loop */
1871 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1872 msq = make_ssa_name (vec_dest, NULL_TREE);
1873 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1874 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1875 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
1876 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
1879 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1880 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1881 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1882 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1883 new_temp = make_ssa_name (vec_dest, new_stmt);
1884 TREE_OPERAND (new_stmt, 0) = new_temp;
1885 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1890 *vec_stmt = new_stmt;
1895 /* Function vectorizable_live_operation.
1897 STMT computes a value that is used outside the loop. Check if
1898 it can be supported. */
1901 vectorizable_live_operation (tree stmt,
1902 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
1903 tree *vec_stmt ATTRIBUTE_UNUSED)
1906 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1907 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1909 enum tree_code code;
1913 enum vect_def_type dt;
1915 if (!STMT_VINFO_LIVE_P (stmt_info))
1918 if (TREE_CODE (stmt) != MODIFY_EXPR)
1921 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1924 operation = TREE_OPERAND (stmt, 1);
1925 code = TREE_CODE (operation);
1927 op_type = TREE_CODE_LENGTH (code);
1929 /* FORNOW: support only if all uses are invariant. This means
1930 that the scalar operations can remain in place, unvectorized.
1931 The original last scalar value that they compute will be used. */
1933 for (i = 0; i < op_type; i++)
1935 op = TREE_OPERAND (operation, i);
1936 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
1938 if (vect_print_dump_info (REPORT_DETAILS))
1939 fprintf (vect_dump, "use not simple.");
1943 if (dt != vect_invariant_def && dt != vect_constant_def)
1947 /* No transformation is required for the cases we currently support. */
1952 /* Function vect_is_simple_cond.
1955 LOOP - the loop that is being vectorized.
1956 COND - Condition that is checked for simple use.
1958 Returns whether a COND can be vectorized. Checks whether
1959 condition operands are supportable using vec_is_simple_use. */
1962 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
1966 enum vect_def_type dt;
1968 if (!COMPARISON_CLASS_P (cond))
1971 lhs = TREE_OPERAND (cond, 0);
1972 rhs = TREE_OPERAND (cond, 1);
1974 if (TREE_CODE (lhs) == SSA_NAME)
1976 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
1977 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
1980 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
1983 if (TREE_CODE (rhs) == SSA_NAME)
1985 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
1986 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
1989 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
1995 /* vectorizable_condition.
1997 Check if STMT is conditional modify expression that can be vectorized.
1998 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1999 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
2002 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2005 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2007 tree scalar_dest = NULL_TREE;
2008 tree vec_dest = NULL_TREE;
2009 tree op = NULL_TREE;
2010 tree cond_expr, then_clause, else_clause;
2011 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2012 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2013 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
2014 tree vec_compare, vec_cond_expr;
2016 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2017 enum machine_mode vec_mode;
2019 enum vect_def_type dt;
2021 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2024 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_loop_def);
2026 if (STMT_VINFO_LIVE_P (stmt_info))
2028 /* FORNOW: not yet supported. */
2029 if (vect_print_dump_info (REPORT_DETAILS))
2030 fprintf (vect_dump, "value used after loop.");
2034 if (TREE_CODE (stmt) != MODIFY_EXPR)
2037 op = TREE_OPERAND (stmt, 1);
2039 if (TREE_CODE (op) != COND_EXPR)
2042 cond_expr = TREE_OPERAND (op, 0);
2043 then_clause = TREE_OPERAND (op, 1);
2044 else_clause = TREE_OPERAND (op, 2);
2046 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
2049 if (TREE_CODE (then_clause) == SSA_NAME)
2051 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
2052 if (!vect_is_simple_use (then_clause, loop_vinfo,
2053 &then_def_stmt, &def, &dt))
2056 else if (TREE_CODE (then_clause) != INTEGER_CST
2057 && TREE_CODE (then_clause) != REAL_CST)
2060 if (TREE_CODE (else_clause) == SSA_NAME)
2062 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
2063 if (!vect_is_simple_use (else_clause, loop_vinfo,
2064 &else_def_stmt, &def, &dt))
2067 else if (TREE_CODE (else_clause) != INTEGER_CST
2068 && TREE_CODE (else_clause) != REAL_CST)
2072 vec_mode = TYPE_MODE (vectype);
2076 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
2077 return expand_vec_cond_expr_p (op, vec_mode);
2083 scalar_dest = TREE_OPERAND (stmt, 0);
2084 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2086 /* Handle cond expr. */
2088 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
2090 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
2091 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
2092 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
2094 /* Arguments are ready. create the new vector stmt. */
2095 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
2096 vec_cond_lhs, vec_cond_rhs);
2097 vec_cond_expr = build (VEC_COND_EXPR, vectype,
2098 vec_compare, vec_then_clause, vec_else_clause);
2100 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_cond_expr);
2101 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2102 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2103 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2108 /* Function vect_transform_stmt.
2110 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2113 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2115 bool is_store = false;
2116 tree vec_stmt = NULL_TREE;
2117 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2120 if (STMT_VINFO_RELEVANT_P (stmt_info))
2122 switch (STMT_VINFO_TYPE (stmt_info))
2124 case op_vec_info_type:
2125 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2129 case assignment_vec_info_type:
2130 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2134 case load_vec_info_type:
2135 done = vectorizable_load (stmt, bsi, &vec_stmt);
2139 case store_vec_info_type:
2140 done = vectorizable_store (stmt, bsi, &vec_stmt);
2145 case condition_vec_info_type:
2146 done = vectorizable_condition (stmt, bsi, &vec_stmt);
2151 if (vect_print_dump_info (REPORT_DETAILS))
2152 fprintf (vect_dump, "stmt not supported.");
2156 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2159 if (STMT_VINFO_LIVE_P (stmt_info))
2161 switch (STMT_VINFO_TYPE (stmt_info))
2163 case reduc_vec_info_type:
2164 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
2169 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
2175 gcc_assert (!STMT_VINFO_VEC_STMT (stmt_info));
2176 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2184 /* This function builds ni_name = number of iterations loop executes
2185 on the loop preheader. */
2188 vect_build_loop_niters (loop_vec_info loop_vinfo)
2190 tree ni_name, stmt, var;
2192 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2193 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
2195 var = create_tmp_var (TREE_TYPE (ni), "niters");
2196 add_referenced_tmp_var (var);
2197 ni_name = force_gimple_operand (ni, &stmt, false, var);
2199 pe = loop_preheader_edge (loop);
2202 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2203 gcc_assert (!new_bb);
2210 /* This function generates the following statements:
2212 ni_name = number of iterations loop executes
2213 ratio = ni_name / vf
2214 ratio_mult_vf_name = ratio * vf
2216 and places them at the loop preheader edge. */
2219 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
2221 tree *ratio_mult_vf_name_ptr,
2222 tree *ratio_name_ptr)
2230 tree ratio_mult_vf_name;
2231 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2232 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
2233 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2234 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
2236 pe = loop_preheader_edge (loop);
2238 /* Generate temporary variable that contains
2239 number of iterations loop executes. */
2241 ni_name = vect_build_loop_niters (loop_vinfo);
2243 /* Create: ratio = ni >> log2(vf) */
2245 var = create_tmp_var (TREE_TYPE (ni), "bnd");
2246 add_referenced_tmp_var (var);
2247 ratio_name = make_ssa_name (var, NULL_TREE);
2248 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
2249 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
2250 SSA_NAME_DEF_STMT (ratio_name) = stmt;
2252 pe = loop_preheader_edge (loop);
2253 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2254 gcc_assert (!new_bb);
2256 /* Create: ratio_mult_vf = ratio << log2 (vf). */
2258 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
2259 add_referenced_tmp_var (var);
2260 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
2261 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
2262 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
2263 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
2265 pe = loop_preheader_edge (loop);
2266 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2267 gcc_assert (!new_bb);
2269 *ni_name_ptr = ni_name;
2270 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
2271 *ratio_name_ptr = ratio_name;
2277 /* Function update_vuses_to_preheader.
2280 STMT - a statement with potential VUSEs.
2281 LOOP - the loop whose preheader will contain STMT.
2283 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
2284 appears to be defined in a V_MAY_DEF in another statement in a loop.
2285 One such case is when the VUSE is at the dereference of a __restricted__
2286 pointer in a load and the V_MAY_DEF is at the dereference of a different
2287 __restricted__ pointer in a store. Vectorization may result in
2288 copy_virtual_uses being called to copy the problematic VUSE to a new
2289 statement that is being inserted in the loop preheader. This procedure
2290 is called to change the SSA_NAME in the new statement's VUSE from the
2291 SSA_NAME updated in the loop to the related SSA_NAME available on the
2292 path entering the loop.
2294 When this function is called, we have the following situation:
2299 # name1 = phi < name0 , name2>
2304 # name2 = vdef <name1>
2309 Stmt S1 was created in the loop preheader block as part of misaligned-load
2310 handling. This function fixes the name of the vuse of S1 from 'name1' to
2314 update_vuses_to_preheader (tree stmt, struct loop *loop)
2316 basic_block header_bb = loop->header;
2317 edge preheader_e = loop_preheader_edge (loop);
2319 use_operand_p use_p;
2321 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
2323 tree ssa_name = USE_FROM_PTR (use_p);
2324 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
2325 tree name_var = SSA_NAME_VAR (ssa_name);
2326 basic_block bb = bb_for_stmt (def_stmt);
2328 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
2329 if (!IS_EMPTY_STMT (def_stmt)
2330 && flow_bb_inside_loop_p (loop, bb))
2332 /* If the block containing the statement defining the SSA_NAME
2333 is in the loop then it's necessary to find the definition
2334 outside the loop using the PHI nodes of the header. */
2336 bool updated = false;
2338 for (phi = phi_nodes (header_bb); phi; phi = TREE_CHAIN (phi))
2340 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
2342 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
2347 gcc_assert (updated);
2353 /* Function vect_update_ivs_after_vectorizer.
2355 "Advance" the induction variables of LOOP to the value they should take
2356 after the execution of LOOP. This is currently necessary because the
2357 vectorizer does not handle induction variables that are used after the
2358 loop. Such a situation occurs when the last iterations of LOOP are
2360 1. We introduced new uses after LOOP for IVs that were not originally used
2361 after LOOP: the IVs of LOOP are now used by an epilog loop.
2362 2. LOOP is going to be vectorized; this means that it will iterate N/VF
2363 times, whereas the loop IVs should be bumped N times.
2366 - LOOP - a loop that is going to be vectorized. The last few iterations
2367 of LOOP were peeled.
2368 - NITERS - the number of iterations that LOOP executes (before it is
2369 vectorized). i.e, the number of times the ivs should be bumped.
2370 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
2371 coming out from LOOP on which there are uses of the LOOP ivs
2372 (this is the path from LOOP->exit to epilog_loop->preheader).
2374 The new definitions of the ivs are placed in LOOP->exit.
2375 The phi args associated with the edge UPDATE_E in the bb
2376 UPDATE_E->dest are updated accordingly.
2378 Assumption 1: Like the rest of the vectorizer, this function assumes
2379 a single loop exit that has a single predecessor.
2381 Assumption 2: The phi nodes in the LOOP header and in update_bb are
2382 organized in the same order.
2384 Assumption 3: The access function of the ivs is simple enough (see
2385 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
2387 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
2388 coming out of LOOP on which the ivs of LOOP are used (this is the path
2389 that leads to the epilog loop; other paths skip the epilog loop). This
2390 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
2391 needs to have its phis updated.
2395 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
2398 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2399 basic_block exit_bb = loop->single_exit->dest;
2401 basic_block update_bb = update_e->dest;
2403 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
2405 /* Make sure there exists a single-predecessor exit bb: */
2406 gcc_assert (single_pred_p (exit_bb));
2408 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
2410 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
2412 tree access_fn = NULL;
2413 tree evolution_part;
2416 tree var, stmt, ni, ni_name;
2417 block_stmt_iterator last_bsi;
2419 if (vect_print_dump_info (REPORT_DETAILS))
2421 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
2422 print_generic_expr (vect_dump, phi, TDF_SLIM);
2425 /* Skip virtual phi's. */
2426 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2428 if (vect_print_dump_info (REPORT_DETAILS))
2429 fprintf (vect_dump, "virtual phi. skip.");
2433 /* Skip reduction phis. */
2434 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
2436 if (vect_print_dump_info (REPORT_DETAILS))
2437 fprintf (vect_dump, "reduc phi. skip.");
2441 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
2442 gcc_assert (access_fn);
2444 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
2445 gcc_assert (evolution_part != NULL_TREE);
2447 /* FORNOW: We do not support IVs whose evolution function is a polynomial
2448 of degree >= 2 or exponential. */
2449 gcc_assert (!tree_is_chrec (evolution_part));
2451 step_expr = evolution_part;
2452 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
2455 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
2456 build2 (MULT_EXPR, TREE_TYPE (niters),
2457 niters, step_expr), init_expr);
2459 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
2460 add_referenced_tmp_var (var);
2462 ni_name = force_gimple_operand (ni, &stmt, false, var);
2464 /* Insert stmt into exit_bb. */
2465 last_bsi = bsi_last (exit_bb);
2467 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
2469 /* Fix phi expressions in the successor bb. */
2470 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
2475 /* Function vect_do_peeling_for_loop_bound
2477 Peel the last iterations of the loop represented by LOOP_VINFO.
2478 The peeled iterations form a new epilog loop. Given that the loop now
2479 iterates NITERS times, the new epilog loop iterates
2480 NITERS % VECTORIZATION_FACTOR times.
2482 The original loop will later be made to iterate
2483 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
2486 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
2487 struct loops *loops)
2489 tree ni_name, ratio_mult_vf_name;
2490 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2491 struct loop *new_loop;
2493 basic_block preheader;
2496 if (vect_print_dump_info (REPORT_DETAILS))
2497 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
2499 initialize_original_copy_tables ();
2501 /* Generate the following variables on the preheader of original loop:
2503 ni_name = number of iteration the original loop executes
2504 ratio = ni_name / vf
2505 ratio_mult_vf_name = ratio * vf */
2506 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
2507 &ratio_mult_vf_name, ratio);
2509 loop_num = loop->num;
2510 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->single_exit,
2511 ratio_mult_vf_name, ni_name, false);
2512 gcc_assert (new_loop);
2513 gcc_assert (loop_num == loop->num);
2514 #ifdef ENABLE_CHECKING
2515 slpeel_verify_cfg_after_peeling (loop, new_loop);
2518 /* A guard that controls whether the new_loop is to be executed or skipped
2519 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
2520 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
2521 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
2522 is on the path where the LOOP IVs are used and need to be updated. */
2524 preheader = loop_preheader_edge (new_loop)->src;
2525 if (EDGE_PRED (preheader, 0)->src == loop->single_exit->dest)
2526 update_e = EDGE_PRED (preheader, 0);
2528 update_e = EDGE_PRED (preheader, 1);
2530 /* Update IVs of original loop as if they were advanced
2531 by ratio_mult_vf_name steps. */
2532 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
2534 /* After peeling we have to reset scalar evolution analyzer. */
2537 free_original_copy_tables ();
2541 /* Function vect_gen_niters_for_prolog_loop
2543 Set the number of iterations for the loop represented by LOOP_VINFO
2544 to the minimum between LOOP_NITERS (the original iteration count of the loop)
2545 and the misalignment of DR - the data reference recorded in
2546 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
2547 this loop, the data reference DR will refer to an aligned location.
2549 The following computation is generated:
2551 If the misalignment of DR is known at compile time:
2552 addr_mis = int mis = DR_MISALIGNMENT (dr);
2553 Else, compute address misalignment in bytes:
2554 addr_mis = addr & (vectype_size - 1)
2556 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
2558 (elem_size = element type size; an element is the scalar element
2559 whose type is the inner type of the vectype) */
2562 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
2564 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2565 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2566 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2568 tree iters, iters_name;
2571 tree dr_stmt = DR_STMT (dr);
2572 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
2573 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2574 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
2575 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
2576 tree niters_type = TREE_TYPE (loop_niters);
2578 pe = loop_preheader_edge (loop);
2580 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
2582 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
2583 int element_size = vectype_align/vf;
2584 int elem_misalign = byte_misalign / element_size;
2586 if (vect_print_dump_info (REPORT_DETAILS))
2587 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
2588 iters = build_int_cst (niters_type, (vf - elem_misalign)&(vf-1));
2592 tree new_stmts = NULL_TREE;
2594 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
2595 tree ptr_type = TREE_TYPE (start_addr);
2596 tree size = TYPE_SIZE (ptr_type);
2597 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2598 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
2599 tree elem_size_log =
2600 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
2601 tree vf_tree = build_int_cst (unsigned_type_node, vf);
2605 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
2606 gcc_assert (!new_bb);
2608 /* Create: byte_misalign = addr & (vectype_size - 1) */
2610 build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
2612 /* Create: elem_misalign = byte_misalign / element_size */
2614 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
2616 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
2617 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
2618 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
2619 iters = fold_convert (niters_type, iters);
2622 /* Create: prolog_loop_niters = min (iters, loop_niters) */
2623 /* If the loop bound is known at compile time we already verified that it is
2624 greater than vf; since the misalignment ('iters') is at most vf, there's
2625 no need to generate the MIN_EXPR in this case. */
2626 if (TREE_CODE (loop_niters) != INTEGER_CST)
2627 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
2629 if (vect_print_dump_info (REPORT_DETAILS))
2631 fprintf (vect_dump, "niters for prolog loop: ");
2632 print_generic_expr (vect_dump, iters, TDF_SLIM);
2635 var = create_tmp_var (niters_type, "prolog_loop_niters");
2636 add_referenced_tmp_var (var);
2637 iters_name = force_gimple_operand (iters, &stmt, false, var);
2639 /* Insert stmt on loop preheader edge. */
2642 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
2643 gcc_assert (!new_bb);
2650 /* Function vect_update_init_of_dr
2652 NITERS iterations were peeled from LOOP. DR represents a data reference
2653 in LOOP. This function updates the information recorded in DR to
2654 account for the fact that the first NITERS iterations had already been
2655 executed. Specifically, it updates the OFFSET field of DR. */
2658 vect_update_init_of_dr (struct data_reference *dr, tree niters)
2660 tree offset = DR_OFFSET (dr);
2662 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
2663 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
2664 DR_OFFSET (dr) = offset;
2668 /* Function vect_update_inits_of_drs
2670 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
2671 This function updates the information recorded for the data references in
2672 the loop to account for the fact that the first NITERS iterations had
2673 already been executed. Specifically, it updates the initial_condition of the
2674 access_function of all the data_references in the loop. */
2677 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
2680 varray_type datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2682 if (vect_dump && (dump_flags & TDF_DETAILS))
2683 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
2685 for (i = 0; i < VARRAY_ACTIVE_SIZE (datarefs); i++)
2687 struct data_reference *dr = VARRAY_GENERIC_PTR (datarefs, i);
2688 vect_update_init_of_dr (dr, niters);
2693 /* Function vect_do_peeling_for_alignment
2695 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
2696 'niters' is set to the misalignment of one of the data references in the
2697 loop, thereby forcing it to refer to an aligned location at the beginning
2698 of the execution of this loop. The data reference for which we are
2699 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
2702 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
2704 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2705 tree niters_of_prolog_loop, ni_name;
2707 struct loop *new_loop;
2709 if (vect_print_dump_info (REPORT_DETAILS))
2710 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
2712 initialize_original_copy_tables ();
2714 ni_name = vect_build_loop_niters (loop_vinfo);
2715 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
2717 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
2719 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
2720 niters_of_prolog_loop, ni_name, true);
2721 gcc_assert (new_loop);
2722 #ifdef ENABLE_CHECKING
2723 slpeel_verify_cfg_after_peeling (new_loop, loop);
2726 /* Update number of times loop executes. */
2727 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
2728 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
2729 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
2731 /* Update the init conditions of the access functions of all data refs. */
2732 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
2734 /* After peeling we have to reset scalar evolution analyzer. */
2737 free_original_copy_tables ();
2741 /* Function vect_create_cond_for_align_checks.
2743 Create a conditional expression that represents the alignment checks for
2744 all of data references (array element references) whose alignment must be
2748 LOOP_VINFO - two fields of the loop information are used.
2749 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
2750 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
2753 COND_EXPR_STMT_LIST - statements needed to construct the conditional
2755 The returned value is the conditional expression to be used in the if
2756 statement that controls which version of the loop gets executed at runtime.
2758 The algorithm makes two assumptions:
2759 1) The number of bytes "n" in a vector is a power of 2.
2760 2) An address "a" is aligned if a%n is zero and that this
2761 test can be done as a&(n-1) == 0. For example, for 16
2762 byte vectors the test is a&0xf == 0. */
2765 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
2766 tree *cond_expr_stmt_list)
2768 VEC(tree,heap) *may_misalign_stmts
2769 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
2771 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
2775 tree int_ptrsize_type;
2777 tree or_tmp_name = NULL_TREE;
2778 tree and_tmp, and_tmp_name, and_stmt;
2781 /* Check that mask is one less than a power of 2, i.e., mask is
2782 all zeros followed by all ones. */
2783 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
2785 /* CHECKME: what is the best integer or unsigned type to use to hold a
2786 cast from a pointer value? */
2787 psize = TYPE_SIZE (ptr_type_node);
2789 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
2791 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
2792 of the first vector of the i'th data reference. */
2794 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
2796 tree new_stmt_list = NULL_TREE;
2798 tree addr_tmp, addr_tmp_name, addr_stmt;
2799 tree or_tmp, new_or_tmp_name, or_stmt;
2801 /* create: addr_tmp = (int)(address_of_first_vector) */
2802 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
2806 if (new_stmt_list != NULL_TREE)
2807 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
2809 sprintf (tmp_name, "%s%d", "addr2int", i);
2810 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2811 add_referenced_tmp_var (addr_tmp);
2812 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
2813 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
2814 addr_stmt = build2 (MODIFY_EXPR, void_type_node,
2815 addr_tmp_name, addr_stmt);
2816 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
2817 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
2819 /* The addresses are OR together. */
2821 if (or_tmp_name != NULL_TREE)
2823 /* create: or_tmp = or_tmp | addr_tmp */
2824 sprintf (tmp_name, "%s%d", "orptrs", i);
2825 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
2826 add_referenced_tmp_var (or_tmp);
2827 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
2828 or_stmt = build2 (MODIFY_EXPR, void_type_node, new_or_tmp_name,
2829 build2 (BIT_IOR_EXPR, int_ptrsize_type,
2832 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
2833 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
2834 or_tmp_name = new_or_tmp_name;
2837 or_tmp_name = addr_tmp_name;
2841 mask_cst = build_int_cst (int_ptrsize_type, mask);
2843 /* create: and_tmp = or_tmp & mask */
2844 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
2845 add_referenced_tmp_var (and_tmp);
2846 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
2848 and_stmt = build2 (MODIFY_EXPR, void_type_node,
2850 build2 (BIT_AND_EXPR, int_ptrsize_type,
2851 or_tmp_name, mask_cst));
2852 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
2853 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
2855 /* Make and_tmp the left operand of the conditional test against zero.
2856 if and_tmp has a non-zero bit then some address is unaligned. */
2857 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
2858 return build2 (EQ_EXPR, boolean_type_node,
2859 and_tmp_name, ptrsize_zero);
2863 /* Function vect_transform_loop.
2865 The analysis phase has determined that the loop is vectorizable.
2866 Vectorize the loop - created vectorized stmts to replace the scalar
2867 stmts in the loop, and update the loop exit condition. */
2870 vect_transform_loop (loop_vec_info loop_vinfo,
2871 struct loops *loops ATTRIBUTE_UNUSED)
2873 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2874 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2875 int nbbs = loop->num_nodes;
2876 block_stmt_iterator si;
2879 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2883 if (vect_print_dump_info (REPORT_DETAILS))
2884 fprintf (vect_dump, "=== vec_transform_loop ===");
2886 /* If the loop has data references that may or may not be aligned then
2887 two versions of the loop need to be generated, one which is vectorized
2888 and one which isn't. A test is then generated to control which of the
2889 loops is executed. The test checks for the alignment of all of the
2890 data references that may or may not be aligned. */
2892 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
2896 tree cond_expr_stmt_list = NULL_TREE;
2897 basic_block condition_bb;
2898 block_stmt_iterator cond_exp_bsi;
2900 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
2901 &cond_expr_stmt_list);
2902 initialize_original_copy_tables ();
2903 nloop = loop_version (loops, loop, cond_expr, &condition_bb, true);
2904 free_original_copy_tables();
2905 update_ssa (TODO_update_ssa);
2906 cond_exp_bsi = bsi_last (condition_bb);
2907 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
2910 /* CHECKME: we wouldn't need this if we calles update_ssa once
2912 bitmap_zero (vect_vnames_to_rename);
2914 /* Peel the loop if there are data refs with unknown alignment.
2915 Only one data ref with unknown store is allowed. */
2917 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
2918 vect_do_peeling_for_alignment (loop_vinfo, loops);
2920 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
2921 compile time constant), or it is a constant that doesn't divide by the
2922 vectorization factor, then an epilog loop needs to be created.
2923 We therefore duplicate the loop: the original loop will be vectorized,
2924 and will compute the first (n/VF) iterations. The second copy of the loop
2925 will remain scalar and will compute the remaining (n%VF) iterations.
2926 (VF is the vectorization factor). */
2928 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2929 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2930 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
2931 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
2933 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
2934 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
2936 /* 1) Make sure the loop header has exactly two entries
2937 2) Make sure we have a preheader basic block. */
2939 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
2941 loop_split_edge_with (loop_preheader_edge (loop), NULL);
2944 /* FORNOW: the vectorizer supports only loops which body consist
2945 of one basic block (header + empty latch). When the vectorizer will
2946 support more involved loop forms, the order by which the BBs are
2947 traversed need to be reconsidered. */
2949 for (i = 0; i < nbbs; i++)
2951 basic_block bb = bbs[i];
2953 for (si = bsi_start (bb); !bsi_end_p (si);)
2955 tree stmt = bsi_stmt (si);
2956 stmt_vec_info stmt_info;
2959 if (vect_print_dump_info (REPORT_DETAILS))
2961 fprintf (vect_dump, "------>vectorizing statement: ");
2962 print_generic_expr (vect_dump, stmt, TDF_SLIM);
2964 stmt_info = vinfo_for_stmt (stmt);
2965 gcc_assert (stmt_info);
2966 if (!STMT_VINFO_RELEVANT_P (stmt_info)
2967 && !STMT_VINFO_LIVE_P (stmt_info))
2972 /* FORNOW: Verify that all stmts operate on the same number of
2973 units and no inner unrolling is necessary. */
2975 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
2976 == (unsigned HOST_WIDE_INT) vectorization_factor);
2978 /* -------- vectorize statement ------------ */
2979 if (vect_print_dump_info (REPORT_DETAILS))
2980 fprintf (vect_dump, "transform statement.");
2982 is_store = vect_transform_stmt (stmt, &si);
2985 /* Free the attached stmt_vec_info and remove the stmt. */
2986 stmt_ann_t ann = stmt_ann (stmt);
2988 set_stmt_info ((tree_ann_t)ann, NULL);
2997 slpeel_make_loop_iterate_ntimes (loop, ratio);
2999 EXECUTE_IF_SET_IN_BITMAP (vect_vnames_to_rename, 0, j, bi)
3000 mark_sym_for_renaming (SSA_NAME_VAR (ssa_name (j)));
3002 /* The memory tags and pointers in vectorized statements need to
3003 have their SSA forms updated. FIXME, why can't this be delayed
3004 until all the loops have been transformed? */
3005 update_ssa (TODO_update_ssa);
3007 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
3008 fprintf (vect_dump, "LOOP VECTORIZED.");