1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
24 #include "coretypes.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
40 #include "tree-data-ref.h"
41 #include "tree-chrec.h"
42 #include "tree-scalar-evolution.h"
43 #include "tree-vectorizer.h"
44 #include "langhooks.h"
45 #include "tree-pass.h"
49 /* Utility functions for the code transformation. */
50 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *);
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (tree, block_stmt_iterator *, tree, tree *, tree *, bool, tree);
54 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
55 static tree vect_setup_realignment (tree, block_stmt_iterator *, tree *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
58 static tree vect_init_vector (tree, tree, tree);
59 static void vect_finish_stmt_generation
60 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void update_vuses_to_preheader (tree, struct loop*);
63 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
64 static tree get_initial_def_for_reduction (tree, tree, tree *);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
77 /* Function vect_estimate_min_profitable_iters
79 Return the number of iterations required for the vector version of the
80 loop to be profitable relative to the cost of the scalar version of the
83 TODO: Take profile info into account before making vectorization
84 decisions, if available. */
87 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
90 int min_profitable_iters;
91 int peel_iters_prologue;
92 int peel_iters_epilogue;
93 int vec_inside_cost = 0;
94 int vec_outside_cost = 0;
95 int scalar_single_iter_cost = 0;
96 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
97 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
98 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
99 int nbbs = loop->num_nodes;
102 /* Cost model disabled. */
103 if (!flag_vect_cost_model)
105 if (vect_print_dump_info (REPORT_DETAILS))
106 fprintf (vect_dump, "cost model disabled.");
110 /* Requires loop versioning tests to handle misalignment.
111 FIXME: Make cost depend on number of stmts in may_misalign list. */
113 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
115 vec_outside_cost += TARG_COND_BRANCH_COST;
116 if (vect_print_dump_info (REPORT_DETAILS))
117 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
121 /* Count statements in scalar loop. Using this as scalar cost for a single
124 TODO: Add outer loop support.
126 TODO: Consider assigning different costs to different scalar
129 for (i = 0; i < nbbs; i++)
131 block_stmt_iterator si;
132 basic_block bb = bbs[i];
134 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
136 tree stmt = bsi_stmt (si);
137 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
138 if (!STMT_VINFO_RELEVANT_P (stmt_info)
139 && !STMT_VINFO_LIVE_P (stmt_info))
141 scalar_single_iter_cost++;
142 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info);
143 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
147 /* Add additional cost for the peeled instructions in prologue and epilogue
150 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
151 at compile-time - we assume the worst.
153 TODO: Build an expression that represents peel_iters for prologue and
154 epilogue to be used in a run-time test. */
156 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
158 if (byte_misalign < 0)
160 peel_iters_prologue = vf - 1;
161 if (vect_print_dump_info (REPORT_DETAILS))
162 fprintf (vect_dump, "cost model: "
163 "prologue peel iters set conservatively.");
165 /* If peeling for alignment is unknown, loop bound of main loop becomes
167 peel_iters_epilogue = vf - 1;
168 if (vect_print_dump_info (REPORT_DETAILS))
169 fprintf (vect_dump, "cost model: "
170 "epilogue peel iters set conservatively because "
171 "peeling for alignment is unknown .");
177 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
178 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
179 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
180 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
182 peel_iters_prologue = nelements - (byte_misalign / element_size);
185 peel_iters_prologue = 0;
187 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
189 peel_iters_epilogue = vf - 1;
190 if (vect_print_dump_info (REPORT_DETAILS))
191 fprintf (vect_dump, "cost model: "
192 "epilogue peel iters set conservatively because "
193 "loop iterations are unknown .");
197 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
198 peel_iters_prologue = niters < peel_iters_prologue ?
199 niters : peel_iters_prologue;
200 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
204 /* Requires a prologue loop when peeling to handle misalignment. Add cost of
205 two guards, one for the peeled loop and one for the vector loop. */
207 if (peel_iters_prologue)
209 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
210 if (vect_print_dump_info (REPORT_DETAILS))
211 fprintf (vect_dump, "cost model: Adding cost of checks for "
215 /* Requires an epilogue loop to finish up remaining iterations after vector
216 loop. Add cost of two guards, one for the peeled loop and one for the
219 if (peel_iters_epilogue
220 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
221 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
223 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
224 if (vect_print_dump_info (REPORT_DETAILS))
225 fprintf (vect_dump, "cost model : Adding cost of checks for "
229 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
230 + (peel_iters_epilogue * scalar_single_iter_cost);
232 /* Calculate number of iterations required to make the vector version
233 profitable, relative to the loop bodies only. The following condition
234 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
235 SIC = scalar iteration cost, VIC = vector iteration cost,
236 VOC = vector outside cost and VF = vectorization factor. */
238 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
240 if (vec_outside_cost == 0)
241 min_profitable_iters = 1;
244 min_profitable_iters = (vec_outside_cost * vf)
245 / ((scalar_single_iter_cost * vf)
248 if ((scalar_single_iter_cost * vf * min_profitable_iters)
249 <= ((vec_inside_cost * min_profitable_iters)
250 + (vec_outside_cost * vf)))
251 min_profitable_iters++;
254 /* vector version will never be profitable. */
257 if (vect_print_dump_info (REPORT_DETAILS))
258 fprintf (vect_dump, "cost model: vector iteration cost = %d "
259 "is divisible by scalar iteration cost = %d by a factor "
260 "greater than or equal to the vectorization factor = %d .",
261 vec_inside_cost, scalar_single_iter_cost, vf);
265 if (vect_print_dump_info (REPORT_DETAILS))
267 fprintf (vect_dump, "Cost model analysis: \n");
268 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
270 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
272 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
273 fprintf (vect_dump, " prologue iterations: %d\n",
274 peel_iters_prologue);
275 fprintf (vect_dump, " epilogue iterations: %d\n",
276 peel_iters_epilogue);
277 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
278 min_profitable_iters);
279 fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
280 min_profitable_iters < vf ? vf : min_profitable_iters);
283 return min_profitable_iters < vf ? vf : min_profitable_iters;
287 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
288 functions. Design better to avoid maintenance issues. */
290 /* Function vect_model_reduction_cost.
292 Models cost for a reduction operation, including the vector ops
293 generated within the strip-mine loop, the initial definition before
294 the loop, and the epilogue code that must be generated. */
297 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
306 enum machine_mode mode;
307 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
308 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
310 /* Cost of reduction op inside loop. */
311 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
313 reduction_op = TREE_OPERAND (operation, op_type-1);
314 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
315 mode = TYPE_MODE (vectype);
316 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
319 orig_stmt = STMT_VINFO_STMT (stmt_info);
321 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
323 /* Add in cost for initial definition. */
324 outer_cost += TARG_VEC_STMT_COST;
326 /* Determine cost of epilogue code.
328 We have a reduction operator that will reduce the vector in one statement.
329 Also requires scalar extract. */
331 if (reduc_code < NUM_TREE_CODES)
332 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
335 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
337 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
338 int element_bitsize = tree_low_cst (bitsize, 1);
339 int nelements = vec_size_in_bits / element_bitsize;
341 optab = optab_for_tree_code (code, vectype);
343 /* We have a whole vector shift available. */
344 if (!VECTOR_MODE_P (mode)
345 || optab->handlers[mode].insn_code == CODE_FOR_nothing)
346 /* Final reduction via vector shifts and the reduction operator. Also
347 requires scalar extract. */
348 outer_cost += ((exact_log2(nelements) * 2 + 1) * TARG_VEC_STMT_COST);
350 /* Use extracts and reduction op for final reduction. For N elements,
351 we have N extracts and N-1 reduction ops. */
352 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
355 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
357 if (vect_print_dump_info (REPORT_DETAILS))
358 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
359 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
360 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
364 /* Function vect_model_induction_cost.
366 Models cost for induction operations. */
369 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
371 /* loop cost for vec_loop. */
372 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
373 /* prologue cost for vec_init and vec_step. */
374 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
376 if (vect_print_dump_info (REPORT_DETAILS))
377 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
378 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
379 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
383 /* Function vect_model_simple_cost.
385 Models cost for simple operations, i.e. those that only emit ncopies of a
386 single op. Right now, this does not account for multiple insns that could
387 be generated for the single vector op. We will handle that shortly. */
390 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type *dt)
394 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
396 /* FORNOW: Assuming maximum 2 args per stmts. */
399 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
400 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) += TARG_SCALAR_TO_VEC_COST;
403 if (vect_print_dump_info (REPORT_DETAILS))
404 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
405 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
406 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
410 /* Function vect_cost_strided_group_size
412 For strided load or store, return the group_size only if it is the first
413 load or store of a group, else return 1. This ensures that group size is
414 only returned once per group. */
417 vect_cost_strided_group_size (stmt_vec_info stmt_info)
419 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
421 if (first_stmt == STMT_VINFO_STMT (stmt_info))
422 return DR_GROUP_SIZE (stmt_info);
428 /* Function vect_model_store_cost
430 Models cost for stores. In the case of strided accesses, one access
431 has the overhead of the strided access attributed to it. */
434 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies, enum vect_def_type dt)
439 if (dt == vect_constant_def || dt == vect_invariant_def)
440 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = TARG_SCALAR_TO_VEC_COST;
442 /* Strided access? */
443 if (DR_GROUP_FIRST_DR (stmt_info))
444 group_size = vect_cost_strided_group_size (stmt_info);
445 /* Not a strided access. */
449 /* Is this an access in a group of stores, which provide strided access?
450 If so, add in the cost of the permutes. */
453 /* Uses a high and low interleave operation for each needed permute. */
454 cost = ncopies * exact_log2(group_size) * group_size
455 * TARG_VEC_STMT_COST;
457 if (vect_print_dump_info (REPORT_DETAILS))
458 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
463 /* Costs of the stores. */
464 cost += ncopies * TARG_VEC_STORE_COST;
466 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = cost;
468 if (vect_print_dump_info (REPORT_DETAILS))
469 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
470 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
471 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
475 /* Function vect_model_load_cost
477 Models cost for loads. In the case of strided accesses, the last access
478 has the overhead of the strided access attributed to it. Since unaligned
479 accesses are supported for loads, we also account for the costs of the
480 access scheme chosen. */
483 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies)
488 int alignment_support_cheme;
490 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
492 /* Strided accesses? */
493 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
496 group_size = vect_cost_strided_group_size (stmt_info);
497 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
499 /* Not a strided access. */
506 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
508 /* Is this an access in a group of loads providing strided access?
509 If so, add in the cost of the permutes. */
512 /* Uses an even and odd extract operations for each needed permute. */
513 inner_cost = ncopies * exact_log2(group_size) * group_size
514 * TARG_VEC_STMT_COST;
516 if (vect_print_dump_info (REPORT_DETAILS))
517 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
522 /* The loads themselves. */
523 switch (alignment_support_cheme)
527 inner_cost += ncopies * TARG_VEC_LOAD_COST;
529 if (vect_print_dump_info (REPORT_DETAILS))
530 fprintf (vect_dump, "vect_model_load_cost: aligned.");
534 case dr_unaligned_supported:
536 /* Here, we assign an additional cost for the unaligned load. */
537 inner_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
539 if (vect_print_dump_info (REPORT_DETAILS))
540 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
545 case dr_unaligned_software_pipeline:
549 if (vect_print_dump_info (REPORT_DETAILS))
550 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
553 /* Unaligned software pipeline has a load of an address, an initial
554 load, and possibly a mask operation to "prime" the loop. However,
555 if this is an access in a group of loads, which provide strided
556 access, then the above cost should only be considered for one
557 access in the group. Inside the loop, there is a load op
558 and a realignment op. */
560 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1)
562 outer_cost = 2*TARG_VEC_STMT_COST;
563 if (targetm.vectorize.builtin_mask_for_load)
564 outer_cost += TARG_VEC_STMT_COST;
567 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
569 inner_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
578 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = inner_cost;
580 if (vect_print_dump_info (REPORT_DETAILS))
581 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
582 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
583 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
588 /* Function vect_get_new_vect_var.
590 Returns a name for a new variable. The current naming scheme appends the
591 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
592 the name of vectorizer generated variables, and appends that to NAME if
596 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
603 case vect_simple_var:
606 case vect_scalar_var:
609 case vect_pointer_var:
617 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
619 new_vect_var = create_tmp_var (type, prefix);
621 /* Mark vector typed variable as a gimple register variable. */
622 if (TREE_CODE (type) == VECTOR_TYPE)
623 DECL_GIMPLE_REG_P (new_vect_var) = true;
629 /* Function vect_create_addr_base_for_vector_ref.
631 Create an expression that computes the address of the first memory location
632 that will be accessed for a data reference.
635 STMT: The statement containing the data reference.
636 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
637 OFFSET: Optional. If supplied, it is be added to the initial address.
640 1. Return an SSA_NAME whose value is the address of the memory location of
641 the first vector of the data reference.
642 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
643 these statement(s) which define the returned SSA_NAME.
645 FORNOW: We are only handling array accesses with step 1. */
648 vect_create_addr_base_for_vector_ref (tree stmt,
652 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
653 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
654 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
655 tree base_name = build_fold_indirect_ref (data_ref_base);
657 tree addr_base, addr_expr;
659 tree base_offset = unshare_expr (DR_OFFSET (dr));
660 tree init = unshare_expr (DR_INIT (dr));
661 tree vect_ptr_type, addr_expr2;
663 /* Create base_offset */
664 base_offset = size_binop (PLUS_EXPR, base_offset, init);
665 base_offset = fold_convert (sizetype, base_offset);
666 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
667 add_referenced_var (dest);
668 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
669 append_to_statement_list_force (new_stmt, new_stmt_list);
673 tree tmp = create_tmp_var (sizetype, "offset");
676 /* For interleaved access step we divide STEP by the size of the
677 interleaving group. */
678 if (DR_GROUP_SIZE (stmt_info))
679 step = fold_build2 (TRUNC_DIV_EXPR, TREE_TYPE (offset), DR_STEP (dr),
680 build_int_cst (TREE_TYPE (offset),
681 DR_GROUP_SIZE (stmt_info)));
685 add_referenced_var (tmp);
686 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
687 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
688 base_offset, offset);
689 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
690 append_to_statement_list_force (new_stmt, new_stmt_list);
693 /* base + base_offset */
694 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
697 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
699 /* addr_expr = addr_base */
700 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
701 get_name (base_name));
702 add_referenced_var (addr_expr);
703 vec_stmt = fold_convert (vect_ptr_type, addr_base);
704 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
705 get_name (base_name));
706 add_referenced_var (addr_expr2);
707 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
708 append_to_statement_list_force (new_stmt, new_stmt_list);
710 if (vect_print_dump_info (REPORT_DETAILS))
712 fprintf (vect_dump, "created ");
713 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
719 /* Function vect_create_data_ref_ptr.
721 Create a new pointer to vector type (vp), that points to the first location
722 accessed in the loop by STMT, along with the def-use update chain to
723 appropriately advance the pointer through the loop iterations. Also set
724 aliasing information for the pointer. This vector pointer is used by the
725 callers to this function to create a memory reference expression for vector
729 1. STMT: a stmt that references memory. Expected to be of the form
730 GIMPLE_MODIFY_STMT <name, data-ref> or
731 GIMPLE_MODIFY_STMT <data-ref, name>.
732 2. BSI: block_stmt_iterator where new stmts can be added.
733 3. OFFSET (optional): an offset to be added to the initial address accessed
734 by the data-ref in STMT.
735 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
736 pointing to the initial address.
737 5. TYPE: if not NULL indicates the required type of the data-ref
740 1. Declare a new ptr to vector_type, and have it point to the base of the
741 data reference (initial addressed accessed by the data reference).
742 For example, for vector of type V8HI, the following code is generated:
745 vp = (v8hi *)initial_address;
747 if OFFSET is not supplied:
748 initial_address = &a[init];
749 if OFFSET is supplied:
750 initial_address = &a[init + OFFSET];
752 Return the initial_address in INITIAL_ADDRESS.
754 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
755 update the pointer in each iteration of the loop.
757 Return the increment stmt that updates the pointer in PTR_INCR.
759 3. Return the pointer. */
762 vect_create_data_ref_ptr (tree stmt,
763 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
764 tree offset, tree *initial_address, tree *ptr_incr,
765 bool only_init, tree type)
768 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
769 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
770 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
771 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
777 tree new_stmt_list = NULL_TREE;
778 edge pe = loop_preheader_edge (loop);
781 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
783 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
785 if (vect_print_dump_info (REPORT_DETAILS))
787 tree data_ref_base = base_name;
788 fprintf (vect_dump, "create vector-pointer variable to type: ");
789 print_generic_expr (vect_dump, vectype, TDF_SLIM);
790 if (TREE_CODE (data_ref_base) == VAR_DECL)
791 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
792 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
793 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
794 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
795 fprintf (vect_dump, " vectorizing a record based array ref: ");
796 else if (TREE_CODE (data_ref_base) == SSA_NAME)
797 fprintf (vect_dump, " vectorizing a pointer ref: ");
798 print_generic_expr (vect_dump, base_name, TDF_SLIM);
801 /** (1) Create the new vector-pointer variable: **/
803 vect_ptr_type = build_pointer_type (type);
805 vect_ptr_type = build_pointer_type (vectype);
806 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
807 get_name (base_name));
808 add_referenced_var (vect_ptr);
810 /** (2) Add aliasing information to the new vector-pointer:
811 (The points-to info (DR_PTR_INFO) may be defined later.) **/
813 tag = DR_SYMBOL_TAG (dr);
816 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
817 tag must be created with tag added to its may alias list. */
819 new_type_alias (vect_ptr, tag, DR_REF (dr));
821 set_symbol_mem_tag (vect_ptr, tag);
823 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
825 /** (3) Calculate the initial address the vector-pointer, and set
826 the vector-pointer to point to it before the loop: **/
828 /* Create: (&(base[init_val+offset]) in the loop preheader. */
829 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
831 pe = loop_preheader_edge (loop);
832 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
833 gcc_assert (!new_bb);
834 *initial_address = new_temp;
836 /* Create: p = (vectype *) initial_base */
837 vec_stmt = fold_convert (vect_ptr_type, new_temp);
838 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
839 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
840 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
841 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
842 gcc_assert (!new_bb);
845 /** (4) Handle the updating of the vector-pointer inside the loop: **/
847 if (only_init) /* No update in loop is required. */
849 /* Copy the points-to information if it exists. */
850 if (DR_PTR_INFO (dr))
851 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
852 return vect_ptr_init;
856 block_stmt_iterator incr_bsi;
858 tree indx_before_incr, indx_after_incr;
861 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
862 create_iv (vect_ptr_init,
863 fold_convert (vect_ptr_type, TYPE_SIZE_UNIT (vectype)),
864 NULL_TREE, loop, &incr_bsi, insert_after,
865 &indx_before_incr, &indx_after_incr);
866 incr = bsi_stmt (incr_bsi);
867 set_stmt_info (stmt_ann (incr),
868 new_stmt_vec_info (incr, loop_vinfo));
870 /* Copy the points-to information if it exists. */
871 if (DR_PTR_INFO (dr))
873 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
874 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
876 merge_alias_info (vect_ptr_init, indx_before_incr);
877 merge_alias_info (vect_ptr_init, indx_after_incr);
881 return indx_before_incr;
886 /* Function bump_vector_ptr
888 Increment a pointer (to a vector type) by vector-size. Connect the new
889 increment stmt to the existing def-use update-chain of the pointer.
891 The pointer def-use update-chain before this function:
892 DATAREF_PTR = phi (p_0, p_2)
894 PTR_INCR: p_2 = DATAREF_PTR + step
896 The pointer def-use update-chain after this function:
897 DATAREF_PTR = phi (p_0, p_2)
899 NEW_DATAREF_PTR = DATAREF_PTR + vector_size
901 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
904 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
906 PTR_INCR - the stmt that updates the pointer in each iteration of the loop.
907 The increment amount across iterations is also expected to be
909 BSI - location where the new update stmt is to be placed.
910 STMT - the original scalar memory-access stmt that is being vectorized.
912 Output: Return NEW_DATAREF_PTR as illustrated above.
917 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
920 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
921 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
922 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
923 tree vptr_type = TREE_TYPE (dataref_ptr);
924 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
925 tree update = TYPE_SIZE_UNIT (vectype);
929 tree new_dataref_ptr;
931 incr_stmt = build_gimple_modify_stmt (ptr_var,
932 build2 (POINTER_PLUS_EXPR, vptr_type,
933 dataref_ptr, update));
934 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
935 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
936 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
938 /* Update the vector-pointer's cross-iteration increment. */
939 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
941 tree use = USE_FROM_PTR (use_p);
943 if (use == dataref_ptr)
944 SET_USE (use_p, new_dataref_ptr);
946 gcc_assert (tree_int_cst_compare (use, update) == 0);
949 /* Copy the points-to information if it exists. */
950 if (DR_PTR_INFO (dr))
951 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
952 merge_alias_info (new_dataref_ptr, dataref_ptr);
954 return new_dataref_ptr;
958 /* Function vect_create_destination_var.
960 Create a new temporary of type VECTYPE. */
963 vect_create_destination_var (tree scalar_dest, tree vectype)
966 const char *new_name;
968 enum vect_var_kind kind;
970 kind = vectype ? vect_simple_var : vect_scalar_var;
971 type = vectype ? vectype : TREE_TYPE (scalar_dest);
973 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
975 new_name = get_name (scalar_dest);
978 vec_dest = vect_get_new_vect_var (type, kind, new_name);
979 add_referenced_var (vec_dest);
985 /* Function vect_init_vector.
987 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
988 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
989 used in the vectorization of STMT. */
992 vect_init_vector (tree stmt, tree vector_var, tree vector_type)
994 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
995 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
996 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1004 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1005 add_referenced_var (new_var);
1007 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1008 new_temp = make_ssa_name (new_var, init_stmt);
1009 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1011 pe = loop_preheader_edge (loop);
1012 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1013 gcc_assert (!new_bb);
1015 if (vect_print_dump_info (REPORT_DETAILS))
1017 fprintf (vect_dump, "created new init_stmt: ");
1018 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1021 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1026 /* Function get_initial_def_for_induction
1029 IV_PHI - the initial value of the induction variable
1032 Return a vector variable, initialized with the first VF values of
1033 the induction variable. E.g., for an iv with IV_PHI='X' and
1034 evolution S, for a vector of 4 units, we want to return:
1035 [X, X + S, X + 2*S, X + 3*S]. */
1038 get_initial_def_for_induction (tree iv_phi)
1040 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1041 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1042 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1043 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1044 tree vectype = get_vectype_for_scalar_type (scalar_type);
1045 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1046 edge pe = loop_preheader_edge (loop);
1048 block_stmt_iterator bsi;
1049 tree vec, vec_init, vec_step, t;
1054 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1055 tree init_expr, step_expr;
1056 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1059 int ncopies = vf / nunits;
1061 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1063 tree stmt = NULL_TREE;
1064 block_stmt_iterator si;
1065 basic_block bb = bb_for_stmt (iv_phi);
1067 gcc_assert (phi_info);
1068 gcc_assert (ncopies >= 1);
1070 /* Find the first insertion point in the BB. */
1071 si = bsi_after_labels (bb);
1072 stmt = bsi_stmt (si);
1074 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (iv_phi));
1075 gcc_assert (access_fn);
1076 ok = vect_is_simple_iv_evolution (loop->num, access_fn,
1077 &init_expr, &step_expr);
1080 /* Create the vector that holds the initial_value of the induction. */
1081 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1082 add_referenced_var (new_var);
1084 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1087 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1088 gcc_assert (!new_bb);
1092 t = tree_cons (NULL_TREE, new_name, t);
1093 for (i = 1; i < nunits; i++)
1097 /* Create: new_name = new_name + step_expr */
1098 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1099 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1100 new_name = make_ssa_name (new_var, init_stmt);
1101 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1103 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1104 gcc_assert (!new_bb);
1106 if (vect_print_dump_info (REPORT_DETAILS))
1108 fprintf (vect_dump, "created new init_stmt: ");
1109 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1111 t = tree_cons (NULL_TREE, new_name, t);
1113 vec = build_constructor_from_list (vectype, nreverse (t));
1114 vec_init = vect_init_vector (stmt, vec, vectype);
1117 /* Create the vector that holds the step of the induction. */
1118 expr = build_int_cst (scalar_type, vf);
1119 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1121 for (i = 0; i < nunits; i++)
1122 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1123 vec = build_constructor_from_list (vectype, t);
1124 vec_step = vect_init_vector (stmt, vec, vectype);
1127 /* Create the following def-use cycle:
1129 vec_init = [X, X+S, X+2*S, X+3*S]
1130 vec_step = [VF*S, VF*S, VF*S, VF*S]
1132 vec_iv = PHI <vec_init, vec_loop>
1136 vec_loop = vec_iv + vec_step; */
1138 /* Create the induction-phi that defines the induction-operand. */
1139 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1140 add_referenced_var (vec_dest);
1141 induction_phi = create_phi_node (vec_dest, loop->header);
1142 set_stmt_info (get_stmt_ann (induction_phi),
1143 new_stmt_vec_info (induction_phi, loop_vinfo));
1144 induc_def = PHI_RESULT (induction_phi);
1146 /* Create the iv update inside the loop */
1147 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1148 build2 (PLUS_EXPR, vectype,
1149 induc_def, vec_step));
1150 vec_def = make_ssa_name (vec_dest, new_stmt);
1151 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1152 bsi = bsi_for_stmt (stmt);
1153 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1155 /* Set the arguments of the phi node: */
1156 add_phi_arg (induction_phi, vec_init, loop_preheader_edge (loop));
1157 add_phi_arg (induction_phi, vec_def, loop_latch_edge (loop));
1160 /* In case the vectorization factor (VF) is bigger than the number
1161 of elements that we can fit in a vectype (nunits), we have to generate
1162 more than one vector stmt - i.e - we need to "unroll" the
1163 vector stmt by a factor VF/nunits. For more details see documentation
1164 in vectorizable_operation. */
1168 stmt_vec_info prev_stmt_vinfo;
1170 /* Create the vector that holds the step of the induction. */
1171 expr = build_int_cst (scalar_type, nunits);
1172 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1174 for (i = 0; i < nunits; i++)
1175 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1176 vec = build_constructor_from_list (vectype, t);
1177 vec_step = vect_init_vector (stmt, vec, vectype);
1179 vec_def = induc_def;
1180 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1181 for (i = 1; i < ncopies; i++)
1185 /* vec_i = vec_prev + vec_{step*nunits} */
1186 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1187 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1188 vec_def = make_ssa_name (vec_dest, new_stmt);
1189 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1190 bsi = bsi_for_stmt (stmt);
1191 vect_finish_stmt_generation (stmt, new_stmt, &bsi);
1193 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1194 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1198 if (vect_print_dump_info (REPORT_DETAILS))
1200 fprintf (vect_dump, "transform induction: created def-use cycle:");
1201 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1202 fprintf (vect_dump, "\n");
1203 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1206 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1211 /* Function vect_get_vec_def_for_operand.
1213 OP is an operand in STMT. This function returns a (vector) def that will be
1214 used in the vectorized stmt for STMT.
1216 In the case that OP is an SSA_NAME which is defined in the loop, then
1217 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1219 In case OP is an invariant or constant, a new stmt that creates a vector def
1220 needs to be introduced. */
1223 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1228 stmt_vec_info def_stmt_info = NULL;
1229 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1230 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1231 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1232 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1239 enum vect_def_type dt;
1243 if (vect_print_dump_info (REPORT_DETAILS))
1245 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1246 print_generic_expr (vect_dump, op, TDF_SLIM);
1249 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1250 gcc_assert (is_simple_use);
1251 if (vect_print_dump_info (REPORT_DETAILS))
1255 fprintf (vect_dump, "def = ");
1256 print_generic_expr (vect_dump, def, TDF_SLIM);
1260 fprintf (vect_dump, " def_stmt = ");
1261 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1267 /* Case 1: operand is a constant. */
1268 case vect_constant_def:
1273 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1274 if (vect_print_dump_info (REPORT_DETAILS))
1275 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1277 for (i = nunits - 1; i >= 0; --i)
1279 t = tree_cons (NULL_TREE, op, t);
1281 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1282 vec_cst = build_vector (vector_type, t);
1284 return vect_init_vector (stmt, vec_cst, vector_type);
1287 /* Case 2: operand is defined outside the loop - loop invariant. */
1288 case vect_invariant_def:
1293 /* Create 'vec_inv = {inv,inv,..,inv}' */
1294 if (vect_print_dump_info (REPORT_DETAILS))
1295 fprintf (vect_dump, "Create vector_inv.");
1297 for (i = nunits - 1; i >= 0; --i)
1299 t = tree_cons (NULL_TREE, def, t);
1302 /* FIXME: use build_constructor directly. */
1303 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1304 vec_inv = build_constructor_from_list (vector_type, t);
1306 return vect_init_vector (stmt, vec_inv, vector_type);
1309 /* Case 3: operand is defined inside the loop. */
1313 *scalar_def = def_stmt;
1315 /* Get the def from the vectorized stmt. */
1316 def_stmt_info = vinfo_for_stmt (def_stmt);
1317 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1318 gcc_assert (vec_stmt);
1319 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1323 /* Case 4: operand is defined by a loop header phi - reduction */
1324 case vect_reduction_def:
1326 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1328 /* Get the def before the loop */
1329 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1330 return get_initial_def_for_reduction (stmt, op, scalar_def);
1333 /* Case 5: operand is defined by loop-header phi - induction. */
1334 case vect_induction_def:
1336 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1338 /* Get the def before the loop */
1339 return get_initial_def_for_induction (def_stmt);
1348 /* Function vect_get_vec_def_for_stmt_copy
1350 Return a vector-def for an operand. This function is used when the
1351 vectorized stmt to be created (by the caller to this function) is a "copy"
1352 created in case the vectorized result cannot fit in one vector, and several
1353 copies of the vector-stmt are required. In this case the vector-def is
1354 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1355 of the stmt that defines VEC_OPRND.
1356 DT is the type of the vector def VEC_OPRND.
1359 In case the vectorization factor (VF) is bigger than the number
1360 of elements that can fit in a vectype (nunits), we have to generate
1361 more than one vector stmt to vectorize the scalar stmt. This situation
1362 arises when there are multiple data-types operated upon in the loop; the
1363 smallest data-type determines the VF, and as a result, when vectorizing
1364 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1365 vector stmt (each computing a vector of 'nunits' results, and together
1366 computing 'VF' results in each iteration). This function is called when
1367 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1368 which VF=16 and nunits=4, so the number of copies required is 4):
1370 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1372 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1373 VS1.1: vx.1 = memref1 VS1.2
1374 VS1.2: vx.2 = memref2 VS1.3
1375 VS1.3: vx.3 = memref3
1377 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1378 VSnew.1: vz1 = vx.1 + ... VSnew.2
1379 VSnew.2: vz2 = vx.2 + ... VSnew.3
1380 VSnew.3: vz3 = vx.3 + ...
1382 The vectorization of S1 is explained in vectorizable_load.
1383 The vectorization of S2:
1384 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1385 the function 'vect_get_vec_def_for_operand' is called to
1386 get the relevant vector-def for each operand of S2. For operand x it
1387 returns the vector-def 'vx.0'.
1389 To create the remaining copies of the vector-stmt (VSnew.j), this
1390 function is called to get the relevant vector-def for each operand. It is
1391 obtained from the respective VS1.j stmt, which is recorded in the
1392 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1394 For example, to obtain the vector-def 'vx.1' in order to create the
1395 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1396 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1397 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1398 and return its def ('vx.1').
1399 Overall, to create the above sequence this function will be called 3 times:
1400 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1401 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1402 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1405 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1407 tree vec_stmt_for_operand;
1408 stmt_vec_info def_stmt_info;
1410 /* Do nothing; can reuse same def. */
1411 if (dt == vect_invariant_def || dt == vect_constant_def )
1414 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1415 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1416 gcc_assert (def_stmt_info);
1417 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1418 gcc_assert (vec_stmt_for_operand);
1419 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1425 /* Function vect_finish_stmt_generation.
1427 Insert a new stmt. */
1430 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1431 block_stmt_iterator *bsi)
1433 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1434 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1436 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1437 set_stmt_info (get_stmt_ann (vec_stmt),
1438 new_stmt_vec_info (vec_stmt, loop_vinfo));
1440 if (vect_print_dump_info (REPORT_DETAILS))
1442 fprintf (vect_dump, "add new stmt: ");
1443 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1446 /* Make sure bsi points to the stmt that is being vectorized. */
1447 gcc_assert (stmt == bsi_stmt (*bsi));
1449 #ifdef USE_MAPPED_LOCATION
1450 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
1452 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
1457 /* Function get_initial_def_for_reduction
1460 STMT - a stmt that performs a reduction operation in the loop.
1461 INIT_VAL - the initial value of the reduction variable
1464 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
1465 of the reduction (used for adjusting the epilog - see below).
1466 Return a vector variable, initialized according to the operation that STMT
1467 performs. This vector will be used as the initial value of the
1468 vector of partial results.
1470 Option1 (adjust in epilog): Initialize the vector as follows:
1473 min/max: [init_val,init_val,..,init_val,init_val]
1474 bit and/or: [init_val,init_val,..,init_val,init_val]
1475 and when necessary (e.g. add/mult case) let the caller know
1476 that it needs to adjust the result by init_val.
1478 Option2: Initialize the vector as follows:
1479 add: [0,0,...,0,init_val]
1480 mult: [1,1,...,1,init_val]
1481 min/max: [init_val,init_val,...,init_val]
1482 bit and/or: [init_val,init_val,...,init_val]
1483 and no adjustments are needed.
1485 For example, for the following code:
1491 STMT is 's = s + a[i]', and the reduction variable is 's'.
1492 For a vector of 4 units, we want to return either [0,0,0,init_val],
1493 or [0,0,0,0] and let the caller know that it needs to adjust
1494 the result at the end by 'init_val'.
1496 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
1497 initialization vector is simpler (same element in all entries).
1498 A cost model should help decide between these two schemes. */
1501 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
1503 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1504 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1505 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1506 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1507 tree type = TREE_TYPE (init_val);
1515 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
1516 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
1520 case WIDEN_SUM_EXPR:
1523 *adjustment_def = init_val;
1524 /* Create a vector of zeros for init_def. */
1525 if (INTEGRAL_TYPE_P (type))
1526 def_for_init = build_int_cst (type, 0);
1528 def_for_init = build_real (type, dconst0);
1529 for (i = nunits - 1; i >= 0; --i)
1530 t = tree_cons (NULL_TREE, def_for_init, t);
1531 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
1532 init_def = build_vector (vector_type, t);
1537 *adjustment_def = NULL_TREE;
1549 /* Function vect_create_epilog_for_reduction
1551 Create code at the loop-epilog to finalize the result of a reduction
1554 VECT_DEF is a vector of partial results.
1555 REDUC_CODE is the tree-code for the epilog reduction.
1556 STMT is the scalar reduction stmt that is being vectorized.
1557 REDUCTION_PHI is the phi-node that carries the reduction computation.
1560 1. Creates the reduction def-use cycle: sets the arguments for
1562 The loop-entry argument is the vectorized initial-value of the reduction.
1563 The loop-latch argument is VECT_DEF - the vector of partial sums.
1564 2. "Reduces" the vector of partial results VECT_DEF into a single result,
1565 by applying the operation specified by REDUC_CODE if available, or by
1566 other means (whole-vector shifts or a scalar loop).
1567 The function also creates a new phi node at the loop exit to preserve
1568 loop-closed form, as illustrated below.
1570 The flow at the entry to this function:
1573 vec_def = phi <null, null> # REDUCTION_PHI
1574 VECT_DEF = vector_stmt # vectorized form of STMT
1575 s_loop = scalar_stmt # (scalar) STMT
1577 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1581 The above is transformed by this function into:
1584 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
1585 VECT_DEF = vector_stmt # vectorized form of STMT
1586 s_loop = scalar_stmt # (scalar) STMT
1588 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
1589 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1590 v_out2 = reduce <v_out1>
1591 s_out3 = extract_field <v_out2, 0>
1592 s_out4 = adjust_result <s_out3>
1598 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
1599 enum tree_code reduc_code, tree reduction_phi)
1601 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1603 enum machine_mode mode;
1604 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1605 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1606 basic_block exit_bb;
1610 block_stmt_iterator exit_bsi;
1615 tree new_scalar_dest, exit_phi;
1616 tree bitsize, bitpos, bytesize;
1617 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
1618 tree scalar_initial_def;
1619 tree vec_initial_def;
1621 imm_use_iterator imm_iter;
1622 use_operand_p use_p;
1623 bool extract_scalar_result;
1627 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
1630 op_type = TREE_OPERAND_LENGTH (operation);
1631 reduction_op = TREE_OPERAND (operation, op_type-1);
1632 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
1633 mode = TYPE_MODE (vectype);
1635 /*** 1. Create the reduction def-use cycle ***/
1637 /* 1.1 set the loop-entry arg of the reduction-phi: */
1638 /* For the case of reduction, vect_get_vec_def_for_operand returns
1639 the scalar def before the loop, that defines the initial value
1640 of the reduction variable. */
1641 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
1642 &scalar_initial_def);
1643 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
1645 /* 1.2 set the loop-latch arg for the reduction-phi: */
1646 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
1648 if (vect_print_dump_info (REPORT_DETAILS))
1650 fprintf (vect_dump, "transform reduction: created def-use cycle:");
1651 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
1652 fprintf (vect_dump, "\n");
1653 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
1657 /*** 2. Create epilog code
1658 The reduction epilog code operates across the elements of the vector
1659 of partial results computed by the vectorized loop.
1660 The reduction epilog code consists of:
1661 step 1: compute the scalar result in a vector (v_out2)
1662 step 2: extract the scalar result (s_out3) from the vector (v_out2)
1663 step 3: adjust the scalar result (s_out3) if needed.
1665 Step 1 can be accomplished using one the following three schemes:
1666 (scheme 1) using reduc_code, if available.
1667 (scheme 2) using whole-vector shifts, if available.
1668 (scheme 3) using a scalar loop. In this case steps 1+2 above are
1671 The overall epilog code looks like this:
1673 s_out0 = phi <s_loop> # original EXIT_PHI
1674 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
1675 v_out2 = reduce <v_out1> # step 1
1676 s_out3 = extract_field <v_out2, 0> # step 2
1677 s_out4 = adjust_result <s_out3> # step 3
1679 (step 3 is optional, and step2 1 and 2 may be combined).
1680 Lastly, the uses of s_out0 are replaced by s_out4.
1684 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
1685 v_out1 = phi <v_loop> */
1687 exit_bb = single_exit (loop)->dest;
1688 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
1689 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
1690 exit_bsi = bsi_after_labels (exit_bb);
1692 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
1693 (i.e. when reduc_code is not available) and in the final adjustment
1694 code (if needed). Also get the original scalar reduction variable as
1695 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
1696 represents a reduction pattern), the tree-code and scalar-def are
1697 taken from the original stmt that the pattern-stmt (STMT) replaces.
1698 Otherwise (it is a regular reduction) - the tree-code and scalar-def
1699 are taken from STMT. */
1701 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1704 /* Regular reduction */
1709 /* Reduction pattern */
1710 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
1711 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1712 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
1714 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
1715 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
1716 scalar_type = TREE_TYPE (scalar_dest);
1717 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
1718 bitsize = TYPE_SIZE (scalar_type);
1719 bytesize = TYPE_SIZE_UNIT (scalar_type);
1721 /* 2.3 Create the reduction code, using one of the three schemes described
1724 if (reduc_code < NUM_TREE_CODES)
1728 /*** Case 1: Create:
1729 v_out2 = reduc_expr <v_out1> */
1731 if (vect_print_dump_info (REPORT_DETAILS))
1732 fprintf (vect_dump, "Reduce using direct vector reduction.");
1734 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1735 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
1736 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1737 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1738 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1739 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1741 extract_scalar_result = true;
1745 enum tree_code shift_code = 0;
1746 bool have_whole_vector_shift = true;
1748 int element_bitsize = tree_low_cst (bitsize, 1);
1749 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1752 if (vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
1753 shift_code = VEC_RSHIFT_EXPR;
1755 have_whole_vector_shift = false;
1757 /* Regardless of whether we have a whole vector shift, if we're
1758 emulating the operation via tree-vect-generic, we don't want
1759 to use it. Only the first round of the reduction is likely
1760 to still be profitable via emulation. */
1761 /* ??? It might be better to emit a reduction tree code here, so that
1762 tree-vect-generic can expand the first round via bit tricks. */
1763 if (!VECTOR_MODE_P (mode))
1764 have_whole_vector_shift = false;
1767 optab optab = optab_for_tree_code (code, vectype);
1768 if (optab->handlers[mode].insn_code == CODE_FOR_nothing)
1769 have_whole_vector_shift = false;
1772 if (have_whole_vector_shift)
1774 /*** Case 2: Create:
1775 for (offset = VS/2; offset >= element_size; offset/=2)
1777 Create: va' = vec_shift <va, offset>
1778 Create: va = vop <va, va'>
1781 if (vect_print_dump_info (REPORT_DETAILS))
1782 fprintf (vect_dump, "Reduce using vector shifts");
1784 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1785 new_temp = PHI_RESULT (new_phi);
1787 for (bit_offset = vec_size_in_bits/2;
1788 bit_offset >= element_bitsize;
1791 tree bitpos = size_int (bit_offset);
1792 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
1793 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1794 new_name = make_ssa_name (vec_dest, epilog_stmt);
1795 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1796 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1798 tmp = build2 (code, vectype, new_name, new_temp);
1799 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
1800 new_temp = make_ssa_name (vec_dest, epilog_stmt);
1801 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1802 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1805 extract_scalar_result = true;
1811 /*** Case 3: Create:
1812 s = extract_field <v_out2, 0>
1813 for (offset = element_size;
1814 offset < vector_size;
1815 offset += element_size;)
1817 Create: s' = extract_field <v_out2, offset>
1818 Create: s = op <s, s'>
1821 if (vect_print_dump_info (REPORT_DETAILS))
1822 fprintf (vect_dump, "Reduce using scalar code. ");
1824 vec_temp = PHI_RESULT (new_phi);
1825 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
1826 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1828 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1829 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1830 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1831 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1832 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1834 for (bit_offset = element_bitsize;
1835 bit_offset < vec_size_in_bits;
1836 bit_offset += element_bitsize)
1839 tree bitpos = bitsize_int (bit_offset);
1840 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
1843 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1844 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1845 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
1846 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
1847 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1849 tmp = build2 (code, scalar_type, new_name, new_temp);
1850 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1851 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1852 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1853 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1856 extract_scalar_result = false;
1860 /* 2.4 Extract the final scalar result. Create:
1861 s_out3 = extract_field <v_out2, bitpos> */
1863 if (extract_scalar_result)
1867 if (vect_print_dump_info (REPORT_DETAILS))
1868 fprintf (vect_dump, "extract scalar result");
1870 if (BYTES_BIG_ENDIAN)
1871 bitpos = size_binop (MULT_EXPR,
1872 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
1873 TYPE_SIZE (scalar_type));
1875 bitpos = bitsize_zero_node;
1877 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
1878 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
1879 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
1880 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1881 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1882 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1885 /* 2.4 Adjust the final result by the initial value of the reduction
1886 variable. (When such adjustment is not needed, then
1887 'scalar_initial_def' is zero).
1890 s_out4 = scalar_expr <s_out3, scalar_initial_def> */
1892 if (scalar_initial_def)
1894 tree tmp = build2 (code, scalar_type, new_temp, scalar_initial_def);
1895 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
1896 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
1897 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
1898 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
1901 /* 2.6 Replace uses of s_out0 with uses of s_out3 */
1903 /* Find the loop-closed-use at the loop exit of the original scalar result.
1904 (The reduction result is expected to have two immediate uses - one at the
1905 latch block, and one at the loop exit). */
1907 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
1909 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
1911 exit_phi = USE_STMT (use_p);
1915 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
1916 gcc_assert (exit_phi);
1917 /* Replace the uses: */
1918 orig_name = PHI_RESULT (exit_phi);
1919 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
1920 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1921 SET_USE (use_p, new_temp);
1925 /* Function vectorizable_reduction.
1927 Check if STMT performs a reduction operation that can be vectorized.
1928 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1929 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1930 Return FALSE if not a vectorizable STMT, TRUE otherwise.
1932 This function also handles reduction idioms (patterns) that have been
1933 recognized in advance during vect_pattern_recog. In this case, STMT may be
1935 X = pattern_expr (arg0, arg1, ..., X)
1936 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
1937 sequence that had been detected and replaced by the pattern-stmt (STMT).
1939 In some cases of reduction patterns, the type of the reduction variable X is
1940 different than the type of the other arguments of STMT.
1941 In such cases, the vectype that is used when transforming STMT into a vector
1942 stmt is different than the vectype that is used to determine the
1943 vectorization factor, because it consists of a different number of elements
1944 than the actual number of elements that are being operated upon in parallel.
1946 For example, consider an accumulation of shorts into an int accumulator.
1947 On some targets it's possible to vectorize this pattern operating on 8
1948 shorts at a time (hence, the vectype for purposes of determining the
1949 vectorization factor should be V8HI); on the other hand, the vectype that
1950 is used to create the vector form is actually V4SI (the type of the result).
1952 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
1953 indicates what is the actual level of parallelism (V8HI in the example), so
1954 that the right vectorization factor would be derived. This vectype
1955 corresponds to the type of arguments to the reduction stmt, and should *NOT*
1956 be used to create the vectorized stmt. The right vectype for the vectorized
1957 stmt is obtained from the type of the result X:
1958 get_vectype_for_scalar_type (TREE_TYPE (X))
1960 This means that, contrary to "regular" reductions (or "regular" stmts in
1961 general), the following equation:
1962 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
1963 does *NOT* necessarily hold for reduction patterns. */
1966 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1971 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
1972 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1973 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1974 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1975 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1977 enum tree_code code, orig_code, epilog_reduc_code = 0;
1978 enum machine_mode vec_mode;
1980 optab optab, reduc_optab;
1981 tree new_temp = NULL_TREE;
1983 enum vect_def_type dt;
1988 stmt_vec_info orig_stmt_info;
1989 tree expr = NULL_TREE;
1991 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1992 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
1993 stmt_vec_info prev_stmt_info;
1995 tree new_stmt = NULL_TREE;
1998 gcc_assert (ncopies >= 1);
2000 /* 1. Is vectorizable reduction? */
2002 /* Not supportable if the reduction variable is used in the loop. */
2003 if (STMT_VINFO_RELEVANT_P (stmt_info))
2006 if (!STMT_VINFO_LIVE_P (stmt_info))
2009 /* Make sure it was already recognized as a reduction computation. */
2010 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2013 /* 2. Has this been recognized as a reduction pattern?
2015 Check if STMT represents a pattern that has been recognized
2016 in earlier analysis stages. For stmts that represent a pattern,
2017 the STMT_VINFO_RELATED_STMT field records the last stmt in
2018 the original sequence that constitutes the pattern. */
2020 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2023 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2024 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2025 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2026 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2029 /* 3. Check the operands of the operation. The first operands are defined
2030 inside the loop body. The last operand is the reduction variable,
2031 which is defined by the loop-header-phi. */
2033 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2035 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2036 code = TREE_CODE (operation);
2037 op_type = TREE_OPERAND_LENGTH (operation);
2038 if (op_type != binary_op && op_type != ternary_op)
2040 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2041 scalar_type = TREE_TYPE (scalar_dest);
2043 /* All uses but the last are expected to be defined in the loop.
2044 The last use is the reduction variable. */
2045 for (i = 0; i < op_type-1; i++)
2047 op = TREE_OPERAND (operation, i);
2048 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2049 gcc_assert (is_simple_use);
2050 if (dt != vect_loop_def
2051 && dt != vect_invariant_def
2052 && dt != vect_constant_def
2053 && dt != vect_induction_def)
2057 op = TREE_OPERAND (operation, i);
2058 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2059 gcc_assert (is_simple_use);
2060 gcc_assert (dt == vect_reduction_def);
2061 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2063 gcc_assert (orig_stmt == vect_is_simple_reduction (loop, def_stmt));
2065 gcc_assert (stmt == vect_is_simple_reduction (loop, def_stmt));
2067 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2070 /* 4. Supportable by target? */
2072 /* 4.1. check support for the operation in the loop */
2073 optab = optab_for_tree_code (code, vectype);
2076 if (vect_print_dump_info (REPORT_DETAILS))
2077 fprintf (vect_dump, "no optab.");
2080 vec_mode = TYPE_MODE (vectype);
2081 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2083 if (vect_print_dump_info (REPORT_DETAILS))
2084 fprintf (vect_dump, "op not supported by target.");
2085 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2086 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2087 < vect_min_worthwhile_factor (code))
2089 if (vect_print_dump_info (REPORT_DETAILS))
2090 fprintf (vect_dump, "proceeding using word mode.");
2093 /* Worthwhile without SIMD support? */
2094 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2095 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2096 < vect_min_worthwhile_factor (code))
2098 if (vect_print_dump_info (REPORT_DETAILS))
2099 fprintf (vect_dump, "not worthwhile without SIMD support.");
2103 /* 4.2. Check support for the epilog operation.
2105 If STMT represents a reduction pattern, then the type of the
2106 reduction variable may be different than the type of the rest
2107 of the arguments. For example, consider the case of accumulation
2108 of shorts into an int accumulator; The original code:
2109 S1: int_a = (int) short_a;
2110 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2113 STMT: int_acc = widen_sum <short_a, int_acc>
2116 1. The tree-code that is used to create the vector operation in the
2117 epilog code (that reduces the partial results) is not the
2118 tree-code of STMT, but is rather the tree-code of the original
2119 stmt from the pattern that STMT is replacing. I.e, in the example
2120 above we want to use 'widen_sum' in the loop, but 'plus' in the
2122 2. The type (mode) we use to check available target support
2123 for the vector operation to be created in the *epilog*, is
2124 determined by the type of the reduction variable (in the example
2125 above we'd check this: plus_optab[vect_int_mode]).
2126 However the type (mode) we use to check available target support
2127 for the vector operation to be created *inside the loop*, is
2128 determined by the type of the other arguments to STMT (in the
2129 example we'd check this: widen_sum_optab[vect_short_mode]).
2131 This is contrary to "regular" reductions, in which the types of all
2132 the arguments are the same as the type of the reduction variable.
2133 For "regular" reductions we can therefore use the same vector type
2134 (and also the same tree-code) when generating the epilog code and
2135 when generating the code inside the loop. */
2139 /* This is a reduction pattern: get the vectype from the type of the
2140 reduction variable, and get the tree-code from orig_stmt. */
2141 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2142 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2143 vec_mode = TYPE_MODE (vectype);
2147 /* Regular reduction: use the same vectype and tree-code as used for
2148 the vector code inside the loop can be used for the epilog code. */
2152 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2154 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2157 if (vect_print_dump_info (REPORT_DETAILS))
2158 fprintf (vect_dump, "no optab for reduction.");
2159 epilog_reduc_code = NUM_TREE_CODES;
2161 if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2163 if (vect_print_dump_info (REPORT_DETAILS))
2164 fprintf (vect_dump, "reduc op not supported by target.");
2165 epilog_reduc_code = NUM_TREE_CODES;
2168 if (!vec_stmt) /* transformation not required. */
2170 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2171 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2177 if (vect_print_dump_info (REPORT_DETAILS))
2178 fprintf (vect_dump, "transform reduction.");
2180 /* Create the destination vector */
2181 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2183 /* Create the reduction-phi that defines the reduction-operand. */
2184 new_phi = create_phi_node (vec_dest, loop->header);
2186 /* In case the vectorization factor (VF) is bigger than the number
2187 of elements that we can fit in a vectype (nunits), we have to generate
2188 more than one vector stmt - i.e - we need to "unroll" the
2189 vector stmt by a factor VF/nunits. For more details see documentation
2190 in vectorizable_operation. */
2192 prev_stmt_info = NULL;
2193 for (j = 0; j < ncopies; j++)
2198 op = TREE_OPERAND (operation, 0);
2199 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2200 if (op_type == ternary_op)
2202 op = TREE_OPERAND (operation, 1);
2203 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2206 /* Get the vector def for the reduction variable from the phi node */
2207 reduc_def = PHI_RESULT (new_phi);
2211 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2212 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2213 if (op_type == ternary_op)
2214 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2216 /* Get the vector def for the reduction variable from the vectorized
2217 reduction operation generated in the previous iteration (j-1) */
2218 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2221 /* Arguments are ready. create the new vector stmt. */
2222 if (op_type == binary_op)
2223 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2225 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2227 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2228 new_temp = make_ssa_name (vec_dest, new_stmt);
2229 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2230 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2233 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2235 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2236 prev_stmt_info = vinfo_for_stmt (new_stmt);
2239 /* Finalize the reduction-phi (set it's arguments) and create the
2240 epilog reduction code. */
2241 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2245 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2246 a function declaration if the target has a vectorized version
2247 of the function, or NULL_TREE if the function cannot be vectorized. */
2250 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2252 tree fndecl = get_callee_fndecl (call);
2253 enum built_in_function code;
2255 /* We only handle functions that do not read or clobber memory -- i.e.
2256 const or novops ones. */
2257 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2261 || TREE_CODE (fndecl) != FUNCTION_DECL
2262 || !DECL_BUILT_IN (fndecl))
2265 code = DECL_FUNCTION_CODE (fndecl);
2266 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2270 /* Function vectorizable_call.
2272 Check if STMT performs a function call that can be vectorized.
2273 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2274 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2275 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2278 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2284 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2286 tree vectype_out, vectype_in;
2289 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2290 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2291 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2293 int ncopies, j, nargs;
2294 call_expr_arg_iterator iter;
2296 enum { NARROW, NONE, WIDEN } modifier;
2298 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2301 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2304 /* FORNOW: not yet supported. */
2305 if (STMT_VINFO_LIVE_P (stmt_info))
2307 if (vect_print_dump_info (REPORT_DETAILS))
2308 fprintf (vect_dump, "value used after loop.");
2312 /* Is STMT a vectorizable call? */
2313 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2316 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2319 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2320 if (TREE_CODE (operation) != CALL_EXPR)
2323 /* Process function arguments. */
2324 rhs_type = NULL_TREE;
2326 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2328 /* Bail out if the function has more than two arguments, we
2329 do not have interesting builtin functions to vectorize with
2330 more than two arguments. */
2334 /* We can only handle calls with arguments of the same type. */
2336 && rhs_type != TREE_TYPE (op))
2338 if (vect_print_dump_info (REPORT_DETAILS))
2339 fprintf (vect_dump, "argument types differ.");
2342 rhs_type = TREE_TYPE (op);
2344 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2346 if (vect_print_dump_info (REPORT_DETAILS))
2347 fprintf (vect_dump, "use not simple.");
2354 /* No arguments is also not good. */
2358 vectype_in = get_vectype_for_scalar_type (rhs_type);
2359 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2361 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
2362 vectype_out = get_vectype_for_scalar_type (lhs_type);
2363 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2366 if (nunits_in == nunits_out / 2)
2368 else if (nunits_out == nunits_in)
2370 else if (nunits_out == nunits_in / 2)
2375 /* For now, we only vectorize functions if a target specific builtin
2376 is available. TODO -- in some cases, it might be profitable to
2377 insert the calls for pieces of the vector, in order to be able
2378 to vectorize other operations in the loop. */
2379 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
2380 if (fndecl == NULL_TREE)
2382 if (vect_print_dump_info (REPORT_DETAILS))
2383 fprintf (vect_dump, "function is not vectorizable.");
2388 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
2390 if (modifier == NARROW)
2391 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2393 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2395 /* Sanity check: make sure that at least one copy of the vectorized stmt
2396 needs to be generated. */
2397 gcc_assert (ncopies >= 1);
2399 if (!vec_stmt) /* transformation not required. */
2401 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
2402 if (vect_print_dump_info (REPORT_DETAILS))
2403 fprintf (vect_dump, "=== vectorizable_call ===");
2404 vect_model_simple_cost (stmt_info, ncopies, dt);
2410 if (vect_print_dump_info (REPORT_DETAILS))
2411 fprintf (vect_dump, "transform operation.");
2414 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2415 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2417 prev_stmt_info = NULL;
2421 for (j = 0; j < ncopies; ++j)
2423 /* Build argument list for the vectorized call. */
2424 /* FIXME: Rewrite this so that it doesn't
2425 construct a temporary list. */
2428 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2432 = vect_get_vec_def_for_operand (op, stmt, NULL);
2435 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2437 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2441 vargs = nreverse (vargs);
2443 rhs = build_function_call_expr (fndecl, vargs);
2444 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2445 new_temp = make_ssa_name (vec_dest, new_stmt);
2446 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2448 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2451 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2453 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2455 prev_stmt_info = vinfo_for_stmt (new_stmt);
2461 for (j = 0; j < ncopies; ++j)
2463 /* Build argument list for the vectorized call. */
2464 /* FIXME: Rewrite this so that it doesn't
2465 construct a temporary list. */
2468 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2473 = vect_get_vec_def_for_operand (op, stmt, NULL);
2475 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2480 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
2482 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
2485 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
2486 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
2490 vargs = nreverse (vargs);
2492 rhs = build_function_call_expr (fndecl, vargs);
2493 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
2494 new_temp = make_ssa_name (vec_dest, new_stmt);
2495 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2497 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2500 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2502 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2504 prev_stmt_info = vinfo_for_stmt (new_stmt);
2507 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2512 /* No current target implements this case. */
2516 /* The call in STMT might prevent it from being removed in dce.
2517 We however cannot remove it here, due to the way the ssa name
2518 it defines is mapped to the new definition. So just replace
2519 rhs of the statement with something harmless. */
2520 type = TREE_TYPE (scalar_dest);
2521 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
2528 /* Function vect_gen_widened_results_half
2530 Create a vector stmt whose code, type, number of arguments, and result
2531 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
2532 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
2533 In the case that CODE is a CALL_EXPR, this means that a call to DECL
2534 needs to be created (DECL is a function-decl of a target-builtin).
2535 STMT is the original scalar stmt that we are vectorizing. */
2538 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
2539 tree vec_oprnd0, tree vec_oprnd1, int op_type,
2540 tree vec_dest, block_stmt_iterator *bsi,
2549 /* Generate half of the widened result: */
2550 if (code == CALL_EXPR)
2552 /* Target specific support */
2553 if (op_type == binary_op)
2554 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
2556 expr = build_call_expr (decl, 1, vec_oprnd0);
2560 /* Generic support */
2561 gcc_assert (op_type == TREE_CODE_LENGTH (code));
2562 if (op_type == binary_op)
2563 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
2565 expr = build1 (code, vectype, vec_oprnd0);
2567 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2568 new_temp = make_ssa_name (vec_dest, new_stmt);
2569 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2570 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2572 if (code == CALL_EXPR)
2574 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2576 if (TREE_CODE (sym) == SSA_NAME)
2577 sym = SSA_NAME_VAR (sym);
2578 mark_sym_for_renaming (sym);
2586 /* Function vectorizable_conversion.
2588 Check if STMT performs a conversion operation, that can be vectorized.
2589 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2590 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2591 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2594 vectorizable_conversion (tree stmt, block_stmt_iterator * bsi,
2601 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2602 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2603 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2604 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
2605 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
2608 enum vect_def_type dt0;
2610 stmt_vec_info prev_stmt_info;
2613 tree vectype_out, vectype_in;
2616 tree rhs_type, lhs_type;
2618 enum { NARROW, NONE, WIDEN } modifier;
2620 /* Is STMT a vectorizable conversion? */
2622 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2625 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2628 if (STMT_VINFO_LIVE_P (stmt_info))
2630 /* FORNOW: not yet supported. */
2631 if (vect_print_dump_info (REPORT_DETAILS))
2632 fprintf (vect_dump, "value used after loop.");
2636 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2639 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2642 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2643 code = TREE_CODE (operation);
2644 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
2647 /* Check types of lhs and rhs */
2648 op0 = TREE_OPERAND (operation, 0);
2649 rhs_type = TREE_TYPE (op0);
2650 vectype_in = get_vectype_for_scalar_type (rhs_type);
2651 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
2653 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2654 lhs_type = TREE_TYPE (scalar_dest);
2655 vectype_out = get_vectype_for_scalar_type (lhs_type);
2656 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
2659 if (nunits_in == nunits_out / 2)
2661 else if (nunits_out == nunits_in)
2663 else if (nunits_out == nunits_in / 2)
2668 if (modifier == NONE)
2669 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
2671 /* Bail out if the types are both integral or non-integral */
2672 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
2673 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
2676 if (modifier == NARROW)
2677 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
2679 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
2681 /* Sanity check: make sure that at least one copy of the vectorized stmt
2682 needs to be generated. */
2683 gcc_assert (ncopies >= 1);
2685 /* Check the operands of the operation. */
2686 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt0))
2688 if (vect_print_dump_info (REPORT_DETAILS))
2689 fprintf (vect_dump, "use not simple.");
2693 /* Supportable by target? */
2694 if ((modifier == NONE
2695 && !targetm.vectorize.builtin_conversion (code, vectype_in))
2696 || (modifier == WIDEN
2697 && !supportable_widening_operation (code, stmt, vectype_in,
2700 || (modifier == NARROW
2701 && !supportable_narrowing_operation (code, stmt, vectype_in,
2704 if (vect_print_dump_info (REPORT_DETAILS))
2705 fprintf (vect_dump, "op not supported by target.");
2709 if (modifier != NONE)
2710 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2712 if (!vec_stmt) /* transformation not required. */
2714 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
2719 if (vect_print_dump_info (REPORT_DETAILS))
2720 fprintf (vect_dump, "transform conversion.");
2723 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
2725 prev_stmt_info = NULL;
2729 for (j = 0; j < ncopies; j++)
2735 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2737 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2740 targetm.vectorize.builtin_conversion (code, vectype_in);
2741 new_stmt = build_call_expr (builtin_decl, 1, vec_oprnd0);
2743 /* Arguments are ready. create the new vector stmt. */
2744 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
2745 new_temp = make_ssa_name (vec_dest, new_stmt);
2746 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2747 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2748 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
2750 if (TREE_CODE (sym) == SSA_NAME)
2751 sym = SSA_NAME_VAR (sym);
2752 mark_sym_for_renaming (sym);
2756 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2758 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2759 prev_stmt_info = vinfo_for_stmt (new_stmt);
2764 /* In case the vectorization factor (VF) is bigger than the number
2765 of elements that we can fit in a vectype (nunits), we have to
2766 generate more than one vector stmt - i.e - we need to "unroll"
2767 the vector stmt by a factor VF/nunits. */
2768 for (j = 0; j < ncopies; j++)
2771 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2773 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2775 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
2777 /* Generate first half of the widened result: */
2779 = vect_gen_widened_results_half (code1, vectype_out, decl1,
2780 vec_oprnd0, vec_oprnd1,
2781 unary_op, vec_dest, bsi, stmt);
2783 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2785 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2786 prev_stmt_info = vinfo_for_stmt (new_stmt);
2788 /* Generate second half of the widened result: */
2790 = vect_gen_widened_results_half (code2, vectype_out, decl2,
2791 vec_oprnd0, vec_oprnd1,
2792 unary_op, vec_dest, bsi, stmt);
2793 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2794 prev_stmt_info = vinfo_for_stmt (new_stmt);
2799 /* In case the vectorization factor (VF) is bigger than the number
2800 of elements that we can fit in a vectype (nunits), we have to
2801 generate more than one vector stmt - i.e - we need to "unroll"
2802 the vector stmt by a factor VF/nunits. */
2803 for (j = 0; j < ncopies; j++)
2808 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
2809 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2813 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd1);
2814 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt0, vec_oprnd0);
2817 /* Arguments are ready. Create the new vector stmt. */
2818 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
2819 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2820 new_temp = make_ssa_name (vec_dest, new_stmt);
2821 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2822 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2825 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
2827 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2829 prev_stmt_info = vinfo_for_stmt (new_stmt);
2832 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
2838 /* Function vectorizable_assignment.
2840 Check if STMT performs an assignment (copy) that can be vectorized.
2841 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2842 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2843 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2846 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2852 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2853 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2854 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2857 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2858 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2859 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2861 gcc_assert (ncopies >= 1);
2863 return false; /* FORNOW */
2865 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2868 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2871 /* FORNOW: not yet supported. */
2872 if (STMT_VINFO_LIVE_P (stmt_info))
2874 if (vect_print_dump_info (REPORT_DETAILS))
2875 fprintf (vect_dump, "value used after loop.");
2879 /* Is vectorizable assignment? */
2880 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2883 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2884 if (TREE_CODE (scalar_dest) != SSA_NAME)
2887 op = GIMPLE_STMT_OPERAND (stmt, 1);
2888 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
2890 if (vect_print_dump_info (REPORT_DETAILS))
2891 fprintf (vect_dump, "use not simple.");
2895 if (!vec_stmt) /* transformation not required. */
2897 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2898 if (vect_print_dump_info (REPORT_DETAILS))
2899 fprintf (vect_dump, "=== vectorizable_assignment ===");
2900 vect_model_simple_cost (stmt_info, ncopies, dt);
2905 if (vect_print_dump_info (REPORT_DETAILS))
2906 fprintf (vect_dump, "transform assignment.");
2909 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2912 op = GIMPLE_STMT_OPERAND (stmt, 1);
2913 vec_oprnd = vect_get_vec_def_for_operand (op, stmt, NULL);
2915 /* Arguments are ready. create the new vector stmt. */
2916 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_oprnd);
2917 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2918 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
2919 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2925 /* Function vect_min_worthwhile_factor.
2927 For a loop where we could vectorize the operation indicated by CODE,
2928 return the minimum vectorization factor that makes it worthwhile
2929 to use generic vectors. */
2931 vect_min_worthwhile_factor (enum tree_code code)
2952 /* Function vectorizable_induction
2954 Check if PHI performs an induction computation that can be vectorized.
2955 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
2956 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
2957 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2960 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
2963 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
2964 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2965 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2966 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2967 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2970 gcc_assert (ncopies >= 1);
2972 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2975 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
2977 if (STMT_VINFO_LIVE_P (stmt_info))
2979 /* FORNOW: not yet supported. */
2980 if (vect_print_dump_info (REPORT_DETAILS))
2981 fprintf (vect_dump, "value used after loop.");
2985 if (TREE_CODE (phi) != PHI_NODE)
2988 if (!vec_stmt) /* transformation not required. */
2990 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
2991 if (vect_print_dump_info (REPORT_DETAILS))
2992 fprintf (vect_dump, "=== vectorizable_induction ===");
2993 vect_model_induction_cost (stmt_info, ncopies);
2999 if (vect_print_dump_info (REPORT_DETAILS))
3000 fprintf (vect_dump, "transform induction phi.");
3002 vec_def = get_initial_def_for_induction (phi);
3003 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3008 /* Function vectorizable_operation.
3010 Check if STMT performs a binary or unary operation that can be vectorized.
3011 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3012 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3013 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3016 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3021 tree op0, op1 = NULL;
3022 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3023 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3024 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3025 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3026 enum tree_code code;
3027 enum machine_mode vec_mode;
3032 enum machine_mode optab_op2_mode;
3034 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3036 stmt_vec_info prev_stmt_info;
3037 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3040 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3043 gcc_assert (ncopies >= 1);
3045 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3048 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3051 /* FORNOW: not yet supported. */
3052 if (STMT_VINFO_LIVE_P (stmt_info))
3054 if (vect_print_dump_info (REPORT_DETAILS))
3055 fprintf (vect_dump, "value used after loop.");
3059 /* Is STMT a vectorizable binary/unary operation? */
3060 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3063 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3066 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3067 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3068 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3069 if (nunits_out != nunits_in)
3072 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3073 code = TREE_CODE (operation);
3075 /* For pointer addition, we should use the normal plus for
3076 the vector addition. */
3077 if (code == POINTER_PLUS_EXPR)
3080 optab = optab_for_tree_code (code, vectype);
3082 /* Support only unary or binary operations. */
3083 op_type = TREE_OPERAND_LENGTH (operation);
3084 if (op_type != unary_op && op_type != binary_op)
3086 if (vect_print_dump_info (REPORT_DETAILS))
3087 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3091 op0 = TREE_OPERAND (operation, 0);
3092 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3094 if (vect_print_dump_info (REPORT_DETAILS))
3095 fprintf (vect_dump, "use not simple.");
3099 if (op_type == binary_op)
3101 op1 = TREE_OPERAND (operation, 1);
3102 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3104 if (vect_print_dump_info (REPORT_DETAILS))
3105 fprintf (vect_dump, "use not simple.");
3110 /* Supportable by target? */
3113 if (vect_print_dump_info (REPORT_DETAILS))
3114 fprintf (vect_dump, "no optab.");
3117 vec_mode = TYPE_MODE (vectype);
3118 icode = (int) optab->handlers[(int) vec_mode].insn_code;
3119 if (icode == CODE_FOR_nothing)
3121 if (vect_print_dump_info (REPORT_DETAILS))
3122 fprintf (vect_dump, "op not supported by target.");
3123 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3124 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3125 < vect_min_worthwhile_factor (code))
3127 if (vect_print_dump_info (REPORT_DETAILS))
3128 fprintf (vect_dump, "proceeding using word mode.");
3131 /* Worthwhile without SIMD support? */
3132 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3133 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3134 < vect_min_worthwhile_factor (code))
3136 if (vect_print_dump_info (REPORT_DETAILS))
3137 fprintf (vect_dump, "not worthwhile without SIMD support.");
3141 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3143 /* FORNOW: not yet supported. */
3144 if (!VECTOR_MODE_P (vec_mode))
3147 /* Invariant argument is needed for a vector shift
3148 by a scalar shift operand. */
3149 optab_op2_mode = insn_data[icode].operand[2].mode;
3150 if (! (VECTOR_MODE_P (optab_op2_mode)
3151 || dt[1] == vect_constant_def
3152 || dt[1] == vect_invariant_def))
3154 if (vect_print_dump_info (REPORT_DETAILS))
3155 fprintf (vect_dump, "operand mode requires invariant argument.");
3160 if (!vec_stmt) /* transformation not required. */
3162 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3163 if (vect_print_dump_info (REPORT_DETAILS))
3164 fprintf (vect_dump, "=== vectorizable_operation ===");
3165 vect_model_simple_cost (stmt_info, ncopies, dt);
3171 if (vect_print_dump_info (REPORT_DETAILS))
3172 fprintf (vect_dump, "transform binary/unary operation.");
3175 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3177 /* In case the vectorization factor (VF) is bigger than the number
3178 of elements that we can fit in a vectype (nunits), we have to generate
3179 more than one vector stmt - i.e - we need to "unroll" the
3180 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3181 from one copy of the vector stmt to the next, in the field
3182 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3183 stages to find the correct vector defs to be used when vectorizing
3184 stmts that use the defs of the current stmt. The example below illustrates
3185 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3186 4 vectorized stmts):
3188 before vectorization:
3189 RELATED_STMT VEC_STMT
3193 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3195 RELATED_STMT VEC_STMT
3196 VS1_0: vx0 = memref0 VS1_1 -
3197 VS1_1: vx1 = memref1 VS1_2 -
3198 VS1_2: vx2 = memref2 VS1_3 -
3199 VS1_3: vx3 = memref3 - -
3200 S1: x = load - VS1_0
3203 step2: vectorize stmt S2 (done here):
3204 To vectorize stmt S2 we first need to find the relevant vector
3205 def for the first operand 'x'. This is, as usual, obtained from
3206 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3207 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3208 relevant vector def 'vx0'. Having found 'vx0' we can generate
3209 the vector stmt VS2_0, and as usual, record it in the
3210 STMT_VINFO_VEC_STMT of stmt S2.
3211 When creating the second copy (VS2_1), we obtain the relevant vector
3212 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3213 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3214 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3215 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3216 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3217 chain of stmts and pointers:
3218 RELATED_STMT VEC_STMT
3219 VS1_0: vx0 = memref0 VS1_1 -
3220 VS1_1: vx1 = memref1 VS1_2 -
3221 VS1_2: vx2 = memref2 VS1_3 -
3222 VS1_3: vx3 = memref3 - -
3223 S1: x = load - VS1_0
3224 VS2_0: vz0 = vx0 + v1 VS2_1 -
3225 VS2_1: vz1 = vx1 + v1 VS2_2 -
3226 VS2_2: vz2 = vx2 + v1 VS2_3 -
3227 VS2_3: vz3 = vx3 + v1 - -
3228 S2: z = x + 1 - VS2_0 */
3230 prev_stmt_info = NULL;
3231 for (j = 0; j < ncopies; j++)
3236 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3237 if (op_type == binary_op)
3239 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3241 /* Vector shl and shr insn patterns can be defined with
3242 scalar operand 2 (shift operand). In this case, use
3243 constant or loop invariant op1 directly, without
3244 extending it to vector mode first. */
3245 optab_op2_mode = insn_data[icode].operand[2].mode;
3246 if (!VECTOR_MODE_P (optab_op2_mode))
3248 if (vect_print_dump_info (REPORT_DETAILS))
3249 fprintf (vect_dump, "operand 1 using scalar mode.");
3254 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3259 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3260 if (op_type == binary_op)
3261 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3264 /* Arguments are ready. create the new vector stmt. */
3266 if (op_type == binary_op)
3267 new_stmt = build_gimple_modify_stmt (vec_dest,
3268 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
3270 new_stmt = build_gimple_modify_stmt (vec_dest,
3271 build1 (code, vectype, vec_oprnd0));
3272 new_temp = make_ssa_name (vec_dest, new_stmt);
3273 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3274 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3277 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3279 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3280 prev_stmt_info = vinfo_for_stmt (new_stmt);
3287 /* Function vectorizable_type_demotion
3289 Check if STMT performs a binary or unary operation that involves
3290 type demotion, and if it can be vectorized.
3291 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3292 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3293 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3296 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
3303 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3305 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3306 enum tree_code code, code1 = ERROR_MARK;
3309 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3311 stmt_vec_info prev_stmt_info;
3320 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3323 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3326 /* FORNOW: not yet supported. */
3327 if (STMT_VINFO_LIVE_P (stmt_info))
3329 if (vect_print_dump_info (REPORT_DETAILS))
3330 fprintf (vect_dump, "value used after loop.");
3334 /* Is STMT a vectorizable type-demotion operation? */
3335 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3338 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3341 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3342 code = TREE_CODE (operation);
3343 if (code != NOP_EXPR && code != CONVERT_EXPR)
3346 op0 = TREE_OPERAND (operation, 0);
3347 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3348 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3350 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3351 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3352 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3353 if (nunits_in != nunits_out / 2) /* FORNOW */
3356 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3357 gcc_assert (ncopies >= 1);
3359 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3360 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3361 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3362 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3363 && (code == NOP_EXPR || code == CONVERT_EXPR))))
3366 /* Check the operands of the operation. */
3367 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3369 if (vect_print_dump_info (REPORT_DETAILS))
3370 fprintf (vect_dump, "use not simple.");
3374 /* Supportable by target? */
3375 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
3378 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3380 if (!vec_stmt) /* transformation not required. */
3382 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
3383 if (vect_print_dump_info (REPORT_DETAILS))
3384 fprintf (vect_dump, "=== vectorizable_demotion ===");
3385 vect_model_simple_cost (stmt_info, ncopies, dt);
3390 if (vect_print_dump_info (REPORT_DETAILS))
3391 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
3395 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3397 /* In case the vectorization factor (VF) is bigger than the number
3398 of elements that we can fit in a vectype (nunits), we have to generate
3399 more than one vector stmt - i.e - we need to "unroll" the
3400 vector stmt by a factor VF/nunits. */
3401 prev_stmt_info = NULL;
3402 for (j = 0; j < ncopies; j++)
3407 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3408 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3412 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3413 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3416 /* Arguments are ready. Create the new vector stmt. */
3417 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3418 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3419 new_temp = make_ssa_name (vec_dest, new_stmt);
3420 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3421 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3424 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3426 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3428 prev_stmt_info = vinfo_for_stmt (new_stmt);
3431 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3436 /* Function vectorizable_type_promotion
3438 Check if STMT performs a binary or unary operation that involves
3439 type promotion, and if it can be vectorized.
3440 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3441 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3442 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3445 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
3451 tree op0, op1 = NULL;
3452 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
3453 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3454 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3455 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3456 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3459 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3461 stmt_vec_info prev_stmt_info;
3469 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3472 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3475 /* FORNOW: not yet supported. */
3476 if (STMT_VINFO_LIVE_P (stmt_info))
3478 if (vect_print_dump_info (REPORT_DETAILS))
3479 fprintf (vect_dump, "value used after loop.");
3483 /* Is STMT a vectorizable type-promotion operation? */
3484 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3487 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3490 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3491 code = TREE_CODE (operation);
3492 if (code != NOP_EXPR && code != CONVERT_EXPR
3493 && code != WIDEN_MULT_EXPR)
3496 op0 = TREE_OPERAND (operation, 0);
3497 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
3498 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3500 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3501 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3502 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3503 if (nunits_out != nunits_in / 2) /* FORNOW */
3506 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3507 gcc_assert (ncopies >= 1);
3509 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
3510 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3511 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
3512 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
3513 && (code == CONVERT_EXPR || code == NOP_EXPR))))
3516 /* Check the operands of the operation. */
3517 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3519 if (vect_print_dump_info (REPORT_DETAILS))
3520 fprintf (vect_dump, "use not simple.");
3524 op_type = TREE_CODE_LENGTH (code);
3525 if (op_type == binary_op)
3527 op1 = TREE_OPERAND (operation, 1);
3528 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3530 if (vect_print_dump_info (REPORT_DETAILS))
3531 fprintf (vect_dump, "use not simple.");
3536 /* Supportable by target? */
3537 if (!supportable_widening_operation (code, stmt, vectype_in,
3538 &decl1, &decl2, &code1, &code2))
3541 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3543 if (!vec_stmt) /* transformation not required. */
3545 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
3546 if (vect_print_dump_info (REPORT_DETAILS))
3547 fprintf (vect_dump, "=== vectorizable_promotion ===");
3548 vect_model_simple_cost (stmt_info, 2*ncopies, dt);
3554 if (vect_print_dump_info (REPORT_DETAILS))
3555 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
3559 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3561 /* In case the vectorization factor (VF) is bigger than the number
3562 of elements that we can fit in a vectype (nunits), we have to generate
3563 more than one vector stmt - i.e - we need to "unroll" the
3564 vector stmt by a factor VF/nunits. */
3566 prev_stmt_info = NULL;
3567 for (j = 0; j < ncopies; j++)
3572 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3573 if (op_type == binary_op)
3574 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
3578 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3579 if (op_type == binary_op)
3580 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
3583 /* Arguments are ready. Create the new vector stmt. We are creating
3584 two vector defs because the widened result does not fit in one vector.
3585 The vectorized stmt can be expressed as a call to a taregt builtin,
3586 or a using a tree-code. */
3587 /* Generate first half of the widened result: */
3588 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
3589 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3591 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3593 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3594 prev_stmt_info = vinfo_for_stmt (new_stmt);
3596 /* Generate second half of the widened result: */
3597 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
3598 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
3599 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3600 prev_stmt_info = vinfo_for_stmt (new_stmt);
3604 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3609 /* Function vect_strided_store_supported.
3611 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
3612 and FALSE otherwise. */
3615 vect_strided_store_supported (tree vectype)
3617 optab interleave_high_optab, interleave_low_optab;
3620 mode = (int) TYPE_MODE (vectype);
3622 /* Check that the operation is supported. */
3623 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
3625 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
3627 if (!interleave_high_optab || !interleave_low_optab)
3629 if (vect_print_dump_info (REPORT_DETAILS))
3630 fprintf (vect_dump, "no optab for interleave.");
3634 if (interleave_high_optab->handlers[(int) mode].insn_code
3636 || interleave_low_optab->handlers[(int) mode].insn_code
3637 == CODE_FOR_nothing)
3639 if (vect_print_dump_info (REPORT_DETAILS))
3640 fprintf (vect_dump, "interleave op not supported by target.");
3647 /* Function vect_permute_store_chain.
3649 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
3650 a power of 2, generate interleave_high/low stmts to reorder the data
3651 correctly for the stores. Return the final references for stores in
3654 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3655 The input is 4 vectors each containing 8 elements. We assign a number to each
3656 element, the input sequence is:
3658 1st vec: 0 1 2 3 4 5 6 7
3659 2nd vec: 8 9 10 11 12 13 14 15
3660 3rd vec: 16 17 18 19 20 21 22 23
3661 4th vec: 24 25 26 27 28 29 30 31
3663 The output sequence should be:
3665 1st vec: 0 8 16 24 1 9 17 25
3666 2nd vec: 2 10 18 26 3 11 19 27
3667 3rd vec: 4 12 20 28 5 13 21 30
3668 4th vec: 6 14 22 30 7 15 23 31
3670 i.e., we interleave the contents of the four vectors in their order.
3672 We use interleave_high/low instructions to create such output. The input of
3673 each interleave_high/low operation is two vectors:
3676 the even elements of the result vector are obtained left-to-right from the
3677 high/low elements of the first vector. The odd elements of the result are
3678 obtained left-to-right from the high/low elements of the second vector.
3679 The output of interleave_high will be: 0 4 1 5
3680 and of interleave_low: 2 6 3 7
3683 The permutation is done in log LENGTH stages. In each stage interleave_high
3684 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
3685 where the first argument is taken from the first half of DR_CHAIN and the
3686 second argument from it's second half.
3689 I1: interleave_high (1st vec, 3rd vec)
3690 I2: interleave_low (1st vec, 3rd vec)
3691 I3: interleave_high (2nd vec, 4th vec)
3692 I4: interleave_low (2nd vec, 4th vec)
3694 The output for the first stage is:
3696 I1: 0 16 1 17 2 18 3 19
3697 I2: 4 20 5 21 6 22 7 23
3698 I3: 8 24 9 25 10 26 11 27
3699 I4: 12 28 13 29 14 30 15 31
3701 The output of the second stage, i.e. the final result is:
3703 I1: 0 8 16 24 1 9 17 25
3704 I2: 2 10 18 26 3 11 19 27
3705 I3: 4 12 20 28 5 13 21 30
3706 I4: 6 14 22 30 7 15 23 31. */
3709 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
3710 unsigned int length,
3712 block_stmt_iterator *bsi,
3713 VEC(tree,heap) **result_chain)
3715 tree perm_dest, perm_stmt, vect1, vect2, high, low;
3716 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3717 tree scalar_dest, tmp;
3720 VEC(tree,heap) *first, *second;
3722 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3723 first = VEC_alloc (tree, heap, length/2);
3724 second = VEC_alloc (tree, heap, length/2);
3726 /* Check that the operation is supported. */
3727 if (!vect_strided_store_supported (vectype))
3730 *result_chain = VEC_copy (tree, heap, dr_chain);
3732 for (i = 0; i < exact_log2 (length); i++)
3734 for (j = 0; j < length/2; j++)
3736 vect1 = VEC_index (tree, dr_chain, j);
3737 vect2 = VEC_index (tree, dr_chain, j+length/2);
3739 /* Create interleaving stmt:
3740 in the case of big endian:
3741 high = interleave_high (vect1, vect2)
3742 and in the case of little endian:
3743 high = interleave_low (vect1, vect2). */
3744 perm_dest = create_tmp_var (vectype, "vect_inter_high");
3745 DECL_GIMPLE_REG_P (perm_dest) = 1;
3746 add_referenced_var (perm_dest);
3747 if (BYTES_BIG_ENDIAN)
3748 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3750 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3751 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3752 high = make_ssa_name (perm_dest, perm_stmt);
3753 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
3754 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3755 VEC_replace (tree, *result_chain, 2*j, high);
3757 /* Create interleaving stmt:
3758 in the case of big endian:
3759 low = interleave_low (vect1, vect2)
3760 and in the case of little endian:
3761 low = interleave_high (vect1, vect2). */
3762 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3763 DECL_GIMPLE_REG_P (perm_dest) = 1;
3764 add_referenced_var (perm_dest);
3765 if (BYTES_BIG_ENDIAN)
3766 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
3768 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
3769 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
3770 low = make_ssa_name (perm_dest, perm_stmt);
3771 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
3772 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
3773 VEC_replace (tree, *result_chain, 2*j+1, low);
3775 dr_chain = VEC_copy (tree, heap, *result_chain);
3781 /* Function vectorizable_store.
3783 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
3785 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3786 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3787 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3790 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
3795 tree vec_oprnd = NULL_TREE;
3796 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3797 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
3798 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3799 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3800 enum machine_mode vec_mode;
3802 enum dr_alignment_support alignment_support_cheme;
3804 def_operand_p def_p;
3806 enum vect_def_type dt;
3807 stmt_vec_info prev_stmt_info = NULL;
3808 tree dataref_ptr = NULL_TREE;
3809 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3810 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3812 tree next_stmt, first_stmt;
3813 bool strided_store = false;
3814 unsigned int group_size, i;
3815 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
3816 gcc_assert (ncopies >= 1);
3818 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3821 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3824 if (STMT_VINFO_LIVE_P (stmt_info))
3826 if (vect_print_dump_info (REPORT_DETAILS))
3827 fprintf (vect_dump, "value used after loop.");
3831 /* Is vectorizable store? */
3833 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3836 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3837 if (TREE_CODE (scalar_dest) != ARRAY_REF
3838 && TREE_CODE (scalar_dest) != INDIRECT_REF
3839 && !DR_GROUP_FIRST_DR (stmt_info))
3842 op = GIMPLE_STMT_OPERAND (stmt, 1);
3843 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
3845 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "use not simple.");
3850 vec_mode = TYPE_MODE (vectype);
3851 /* FORNOW. In some cases can vectorize even if data-type not supported
3852 (e.g. - array initialization with 0). */
3853 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
3856 if (!STMT_VINFO_DATA_REF (stmt_info))
3859 if (DR_GROUP_FIRST_DR (stmt_info))
3861 strided_store = true;
3862 if (!vect_strided_store_supported (vectype))
3866 if (!vec_stmt) /* transformation not required. */
3868 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
3869 vect_model_store_cost (stmt_info, ncopies, dt);
3877 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3878 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
3879 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
3881 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
3883 /* We vectorize all the stmts of the interleaving group when we
3884 reach the last stmt in the group. */
3885 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
3886 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt)))
3888 *vec_stmt = NULL_TREE;
3899 if (vect_print_dump_info (REPORT_DETAILS))
3900 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
3902 dr_chain = VEC_alloc (tree, heap, group_size);
3903 oprnds = VEC_alloc (tree, heap, group_size);
3905 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
3906 gcc_assert (alignment_support_cheme);
3907 gcc_assert (alignment_support_cheme == dr_aligned); /* FORNOW */
3909 /* In case the vectorization factor (VF) is bigger than the number
3910 of elements that we can fit in a vectype (nunits), we have to generate
3911 more than one vector stmt - i.e - we need to "unroll" the
3912 vector stmt by a factor VF/nunits. For more details see documentation in
3913 vect_get_vec_def_for_copy_stmt. */
3915 /* In case of interleaving (non-unit strided access):
3922 We create vectorized stores starting from base address (the access of the
3923 first stmt in the chain (S2 in the above example), when the last store stmt
3924 of the chain (S4) is reached:
3927 VS2: &base + vec_size*1 = vx0
3928 VS3: &base + vec_size*2 = vx1
3929 VS4: &base + vec_size*3 = vx3
3931 Then permutation statements are generated:
3933 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
3934 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
3937 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
3938 (the order of the data-refs in the output of vect_permute_store_chain
3939 corresponds to the order of scalar stmts in the interleaving chain - see
3940 the documentation of vect_permute_store_chain()).
3942 In case of both multiple types and interleaving, above vector stores and
3943 permutation stmts are created for every copy. The result vector stmts are
3944 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
3945 STMT_VINFO_RELATED_STMT for the next copies.
3948 prev_stmt_info = NULL;
3949 for (j = 0; j < ncopies; j++)
3956 /* For interleaved stores we collect vectorized defs for all the
3957 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then used
3958 as an input to vect_permute_store_chain(), and OPRNDS as an input
3959 to vect_get_vec_def_for_stmt_copy() for the next copy.
3960 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3961 OPRNDS are of size 1. */
3962 next_stmt = first_stmt;
3963 for (i = 0; i < group_size; i++)
3965 /* Since gaps are not supported for interleaved stores, GROUP_SIZE
3966 is the exact number of stmts in the chain. Therefore, NEXT_STMT
3967 can't be NULL_TREE. In case that there is no interleaving,
3968 GROUP_SIZE is 1, and only one iteration of the loop will be
3970 gcc_assert (next_stmt);
3971 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
3972 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt, NULL);
3973 VEC_quick_push(tree, dr_chain, vec_oprnd);
3974 VEC_quick_push(tree, oprnds, vec_oprnd);
3975 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3977 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, NULL_TREE,
3978 &dummy, &ptr_incr, false,
3979 TREE_TYPE (vec_oprnd));
3983 /* For interleaved stores we created vectorized defs for all the
3984 defs stored in OPRNDS in the previous iteration (previous copy).
3985 DR_CHAIN is then used as an input to vect_permute_store_chain(),
3986 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
3988 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
3989 OPRNDS are of size 1. */
3990 for (i = 0; i < group_size; i++)
3992 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
3993 VEC_index (tree, oprnds, i));
3994 VEC_replace(tree, dr_chain, i, vec_oprnd);
3995 VEC_replace(tree, oprnds, i, vec_oprnd);
3997 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4002 result_chain = VEC_alloc (tree, heap, group_size);
4004 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4009 next_stmt = first_stmt;
4010 for (i = 0; i < group_size; i++)
4012 /* For strided stores vectorized defs are interleaved in
4013 vect_permute_store_chain(). */
4015 vec_oprnd = VEC_index(tree, result_chain, i);
4017 data_ref = build_fold_indirect_ref (dataref_ptr);
4018 /* Arguments are ready. Create the new vector stmt. */
4019 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4020 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4022 /* Set the VDEFs for the vector pointer. If this virtual def
4023 has a use outside the loop and a loop peel is performed
4024 then the def may be renamed by the peel. Mark it for
4025 renaming so the later use will also be renamed. */
4026 copy_virtual_operands (new_stmt, next_stmt);
4029 /* The original store is deleted so the same SSA_NAMEs
4031 FOR_EACH_SSA_TREE_OPERAND (def, next_stmt, iter, SSA_OP_VDEF)
4033 SSA_NAME_DEF_STMT (def) = new_stmt;
4034 mark_sym_for_renaming (SSA_NAME_VAR (def));
4037 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4041 /* Create new names for all the definitions created by COPY and
4042 add replacement mappings for each new name. */
4043 FOR_EACH_SSA_DEF_OPERAND (def_p, new_stmt, iter, SSA_OP_VDEF)
4045 create_new_def_for (DEF_FROM_PTR (def_p), new_stmt, def_p);
4046 mark_sym_for_renaming (SSA_NAME_VAR (DEF_FROM_PTR (def_p)));
4049 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4052 prev_stmt_info = vinfo_for_stmt (new_stmt);
4053 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4056 /* Bump the vector pointer. */
4057 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4065 /* Function vect_setup_realignment
4067 This function is called when vectorizing an unaligned load using
4068 the dr_unaligned_software_pipeline scheme.
4069 This function generates the following code at the loop prolog:
4072 msq_init = *(floor(p)); # prolog load
4073 realignment_token = call target_builtin;
4075 msq = phi (msq_init, ---)
4077 The code above sets up a new (vector) pointer, pointing to the first
4078 location accessed by STMT, and a "floor-aligned" load using that pointer.
4079 It also generates code to compute the "realignment-token" (if the relevant
4080 target hook was defined), and creates a phi-node at the loop-header bb
4081 whose arguments are the result of the prolog-load (created by this
4082 function) and the result of a load that takes place in the loop (to be
4083 created by the caller to this function).
4084 The caller to this function uses the phi-result (msq) to create the
4085 realignment code inside the loop, and sets up the missing phi argument,
4089 msq = phi (msq_init, lsq)
4090 lsq = *(floor(p')); # load in loop
4091 result = realign_load (msq, lsq, realignment_token);
4094 STMT - (scalar) load stmt to be vectorized. This load accesses
4095 a memory location that may be unaligned.
4096 BSI - place where new code is to be inserted.
4099 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4100 target hook, if defined.
4101 Return value - the result of the loop-header phi node. */
4104 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4105 tree *realignment_token)
4107 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4108 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4109 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4110 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4111 edge pe = loop_preheader_edge (loop);
4112 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4125 /* 1. Create msq_init = *(floor(p1)) in the loop preheader */
4126 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4127 ptr = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &init_addr, &inc, true,
4129 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
4130 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4131 new_temp = make_ssa_name (vec_dest, new_stmt);
4132 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4133 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4134 gcc_assert (!new_bb);
4135 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
4136 copy_virtual_operands (new_stmt, stmt);
4137 update_vuses_to_preheader (new_stmt, loop);
4139 /* 2. Create permutation mask, if required, in loop preheader. */
4140 if (targetm.vectorize.builtin_mask_for_load)
4144 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4145 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
4146 vec_dest = vect_create_destination_var (scalar_dest,
4147 TREE_TYPE (new_stmt));
4148 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4149 new_temp = make_ssa_name (vec_dest, new_stmt);
4150 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4151 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
4152 gcc_assert (!new_bb);
4153 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
4155 /* The result of the CALL_EXPR to this builtin is determined from
4156 the value of the parameter and no global variables are touched
4157 which makes the builtin a "const" function. Requiring the
4158 builtin to have the "const" attribute makes it unnecessary
4159 to call mark_call_clobbered. */
4160 gcc_assert (TREE_READONLY (builtin_decl));
4163 /* 3. Create msq = phi <msq_init, lsq> in loop */
4164 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4165 msq = make_ssa_name (vec_dest, NULL_TREE);
4166 phi_stmt = create_phi_node (msq, loop->header);
4167 SSA_NAME_DEF_STMT (msq) = phi_stmt;
4168 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
4174 /* Function vect_strided_load_supported.
4176 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
4177 and FALSE otherwise. */
4180 vect_strided_load_supported (tree vectype)
4182 optab perm_even_optab, perm_odd_optab;
4185 mode = (int) TYPE_MODE (vectype);
4187 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
4188 if (!perm_even_optab)
4190 if (vect_print_dump_info (REPORT_DETAILS))
4191 fprintf (vect_dump, "no optab for perm_even.");
4195 if (perm_even_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4197 if (vect_print_dump_info (REPORT_DETAILS))
4198 fprintf (vect_dump, "perm_even op not supported by target.");
4202 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
4203 if (!perm_odd_optab)
4205 if (vect_print_dump_info (REPORT_DETAILS))
4206 fprintf (vect_dump, "no optab for perm_odd.");
4210 if (perm_odd_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4212 if (vect_print_dump_info (REPORT_DETAILS))
4213 fprintf (vect_dump, "perm_odd op not supported by target.");
4220 /* Function vect_permute_load_chain.
4222 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4223 a power of 2, generate extract_even/odd stmts to reorder the input data
4224 correctly. Return the final references for loads in RESULT_CHAIN.
4226 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4227 The input is 4 vectors each containing 8 elements. We assign a number to each
4228 element, the input sequence is:
4230 1st vec: 0 1 2 3 4 5 6 7
4231 2nd vec: 8 9 10 11 12 13 14 15
4232 3rd vec: 16 17 18 19 20 21 22 23
4233 4th vec: 24 25 26 27 28 29 30 31
4235 The output sequence should be:
4237 1st vec: 0 4 8 12 16 20 24 28
4238 2nd vec: 1 5 9 13 17 21 25 29
4239 3rd vec: 2 6 10 14 18 22 26 30
4240 4th vec: 3 7 11 15 19 23 27 31
4242 i.e., the first output vector should contain the first elements of each
4243 interleaving group, etc.
4245 We use extract_even/odd instructions to create such output. The input of each
4246 extract_even/odd operation is two vectors
4250 and the output is the vector of extracted even/odd elements. The output of
4251 extract_even will be: 0 2 4 6
4252 and of extract_odd: 1 3 5 7
4255 The permutation is done in log LENGTH stages. In each stage extract_even and
4256 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
4257 order. In our example,
4259 E1: extract_even (1st vec, 2nd vec)
4260 E2: extract_odd (1st vec, 2nd vec)
4261 E3: extract_even (3rd vec, 4th vec)
4262 E4: extract_odd (3rd vec, 4th vec)
4264 The output for the first stage will be:
4266 E1: 0 2 4 6 8 10 12 14
4267 E2: 1 3 5 7 9 11 13 15
4268 E3: 16 18 20 22 24 26 28 30
4269 E4: 17 19 21 23 25 27 29 31
4271 In order to proceed and create the correct sequence for the next stage (or
4272 for the correct output, if the second stage is the last one, as in our
4273 example), we first put the output of extract_even operation and then the
4274 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4275 The input for the second stage is:
4277 1st vec (E1): 0 2 4 6 8 10 12 14
4278 2nd vec (E3): 16 18 20 22 24 26 28 30
4279 3rd vec (E2): 1 3 5 7 9 11 13 15
4280 4th vec (E4): 17 19 21 23 25 27 29 31
4282 The output of the second stage:
4284 E1: 0 4 8 12 16 20 24 28
4285 E2: 2 6 10 14 18 22 26 30
4286 E3: 1 5 9 13 17 21 25 29
4287 E4: 3 7 11 15 19 23 27 31
4289 And RESULT_CHAIN after reordering:
4291 1st vec (E1): 0 4 8 12 16 20 24 28
4292 2nd vec (E3): 1 5 9 13 17 21 25 29
4293 3rd vec (E2): 2 6 10 14 18 22 26 30
4294 4th vec (E4): 3 7 11 15 19 23 27 31. */
4297 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4298 unsigned int length,
4300 block_stmt_iterator *bsi,
4301 VEC(tree,heap) **result_chain)
4303 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
4304 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4309 /* Check that the operation is supported. */
4310 if (!vect_strided_load_supported (vectype))
4313 *result_chain = VEC_copy (tree, heap, dr_chain);
4314 for (i = 0; i < exact_log2 (length); i++)
4316 for (j = 0; j < length; j +=2)
4318 first_vect = VEC_index (tree, dr_chain, j);
4319 second_vect = VEC_index (tree, dr_chain, j+1);
4321 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4322 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4323 DECL_GIMPLE_REG_P (perm_dest) = 1;
4324 add_referenced_var (perm_dest);
4326 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
4327 first_vect, second_vect);
4328 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4330 data_ref = make_ssa_name (perm_dest, perm_stmt);
4331 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4332 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4333 mark_symbols_for_renaming (perm_stmt);
4335 VEC_replace (tree, *result_chain, j/2, data_ref);
4337 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4338 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4339 DECL_GIMPLE_REG_P (perm_dest) = 1;
4340 add_referenced_var (perm_dest);
4342 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
4343 first_vect, second_vect);
4344 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4345 data_ref = make_ssa_name (perm_dest, perm_stmt);
4346 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
4347 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4348 mark_symbols_for_renaming (perm_stmt);
4350 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4352 dr_chain = VEC_copy (tree, heap, *result_chain);
4358 /* Function vect_transform_strided_load.
4360 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4361 to perform their permutation and ascribe the result vectorized statements to
4362 the scalar statements.
4366 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
4367 block_stmt_iterator *bsi)
4369 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4370 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4371 tree next_stmt, new_stmt;
4372 VEC(tree,heap) *result_chain = NULL;
4373 unsigned int i, gap_count;
4376 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4377 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
4378 vectors, that are ready for vector computation. */
4379 result_chain = VEC_alloc (tree, heap, size);
4381 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
4384 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4385 Since we scan the chain starting from it's first node, their order
4386 corresponds the order of data-refs in RESULT_CHAIN. */
4387 next_stmt = first_stmt;
4389 for (i = 0; VEC_iterate(tree, result_chain, i, tmp_data_ref); i++)
4394 /* Skip the gaps. Loads created for the gaps will be removed by dead
4395 code elimination pass later.
4396 DR_GROUP_GAP is the number of steps in elements from the previous
4397 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
4398 correspond to the gaps.
4400 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
4408 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4409 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4410 copies, and we put the new vector statement in the first available
4412 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4413 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4416 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4417 tree rel_stmt = STMT_VINFO_RELATED_STMT (
4418 vinfo_for_stmt (prev_stmt));
4421 prev_stmt = rel_stmt;
4422 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4424 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
4426 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4428 /* If NEXT_STMT accesses the same DR as the previous statement,
4429 put the same TMP_DATA_REF as its vectorized statement; otherwise
4430 get the next data-ref from RESULT_CHAIN. */
4431 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
4439 /* vectorizable_load.
4441 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
4443 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4444 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4445 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4448 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4451 tree vec_dest = NULL;
4452 tree data_ref = NULL;
4454 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4455 stmt_vec_info prev_stmt_info;
4456 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4458 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
4459 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4462 tree new_stmt = NULL_TREE;
4464 enum dr_alignment_support alignment_support_cheme;
4465 tree dataref_ptr = NULL_TREE;
4467 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4468 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4469 int i, j, group_size;
4470 tree msq = NULL_TREE, lsq;
4471 tree offset = NULL_TREE;
4472 tree realignment_token = NULL_TREE;
4473 tree phi_stmt = NULL_TREE;
4474 VEC(tree,heap) *dr_chain = NULL;
4475 bool strided_load = false;
4478 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4481 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4484 /* FORNOW: not yet supported. */
4485 if (STMT_VINFO_LIVE_P (stmt_info))
4487 if (vect_print_dump_info (REPORT_DETAILS))
4488 fprintf (vect_dump, "value used after loop.");
4492 /* Is vectorizable load? */
4493 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4496 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4497 if (TREE_CODE (scalar_dest) != SSA_NAME)
4500 op = GIMPLE_STMT_OPERAND (stmt, 1);
4501 if (TREE_CODE (op) != ARRAY_REF
4502 && TREE_CODE (op) != INDIRECT_REF
4503 && !DR_GROUP_FIRST_DR (stmt_info))
4506 if (!STMT_VINFO_DATA_REF (stmt_info))
4509 mode = (int) TYPE_MODE (vectype);
4511 /* FORNOW. In some cases can vectorize even if data-type not supported
4512 (e.g. - data copies). */
4513 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
4515 if (vect_print_dump_info (REPORT_DETAILS))
4516 fprintf (vect_dump, "Aligned load, but unsupported type.");
4520 /* Check if the load is a part of an interleaving chain. */
4521 if (DR_GROUP_FIRST_DR (stmt_info))
4523 strided_load = true;
4525 /* Check if interleaving is supported. */
4526 if (!vect_strided_load_supported (vectype))
4530 if (!vec_stmt) /* transformation not required. */
4532 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
4533 vect_model_load_cost (stmt_info, ncopies);
4537 if (vect_print_dump_info (REPORT_DETAILS))
4538 fprintf (vect_dump, "transform load.");
4544 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4545 /* Check if the chain of loads is already vectorized. */
4546 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
4548 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4551 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4552 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4553 dr_chain = VEC_alloc (tree, heap, group_size);
4562 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
4563 gcc_assert (alignment_support_cheme);
4566 /* In case the vectorization factor (VF) is bigger than the number
4567 of elements that we can fit in a vectype (nunits), we have to generate
4568 more than one vector stmt - i.e - we need to "unroll" the
4569 vector stmt by a factor VF/nunits. In doing so, we record a pointer
4570 from one copy of the vector stmt to the next, in the field
4571 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
4572 stages to find the correct vector defs to be used when vectorizing
4573 stmts that use the defs of the current stmt. The example below illustrates
4574 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
4575 4 vectorized stmts):
4577 before vectorization:
4578 RELATED_STMT VEC_STMT
4582 step 1: vectorize stmt S1:
4583 We first create the vector stmt VS1_0, and, as usual, record a
4584 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
4585 Next, we create the vector stmt VS1_1, and record a pointer to
4586 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
4587 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
4589 RELATED_STMT VEC_STMT
4590 VS1_0: vx0 = memref0 VS1_1 -
4591 VS1_1: vx1 = memref1 VS1_2 -
4592 VS1_2: vx2 = memref2 VS1_3 -
4593 VS1_3: vx3 = memref3 - -
4594 S1: x = load - VS1_0
4597 See in documentation in vect_get_vec_def_for_stmt_copy for how the
4598 information we recorded in RELATED_STMT field is used to vectorize
4601 /* In case of interleaving (non-unit strided access):
4608 Vectorized loads are created in the order of memory accesses
4609 starting from the access of the first stmt of the chain:
4612 VS2: vx1 = &base + vec_size*1
4613 VS3: vx3 = &base + vec_size*2
4614 VS4: vx4 = &base + vec_size*3
4616 Then permutation statements are generated:
4618 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
4619 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
4622 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4623 (the order of the data-refs in the output of vect_permute_load_chain
4624 corresponds to the order of scalar stmts in the interleaving chain - see
4625 the documentation of vect_permute_load_chain()).
4626 The generation of permutation stmts and recording them in
4627 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
4629 In case of both multiple types and interleaving, the vector loads and
4630 permutation stmts above are created for every copy. The result vector stmts
4631 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4632 STMT_VINFO_RELATED_STMT for the next copies. */
4634 /* If the data reference is aligned (dr_aligned) or potentially unaligned
4635 on a target that supports unaligned accesses (dr_unaligned_supported)
4636 we generate the following code:
4640 p = p + indx * vectype_size;
4645 Otherwise, the data reference is potentially unaligned on a target that
4646 does not support unaligned accesses (dr_unaligned_software_pipeline) -
4647 then generate the following code, in which the data in each iteration is
4648 obtained by two vector loads, one from the previous iteration, and one
4649 from the current iteration:
4651 msq_init = *(floor(p1))
4652 p2 = initial_addr + VS - 1;
4653 realignment_token = call target_builtin;
4656 p2 = p2 + indx * vectype_size
4658 vec_dest = realign_load (msq, lsq, realignment_token)
4663 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4665 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token);
4666 phi_stmt = SSA_NAME_DEF_STMT (msq);
4667 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
4670 prev_stmt_info = NULL;
4671 for (j = 0; j < ncopies; j++)
4673 /* 1. Create the vector pointer update chain. */
4675 dataref_ptr = vect_create_data_ref_ptr (first_stmt, bsi, offset, &dummy,
4676 &ptr_incr, false, NULL_TREE);
4678 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4680 for (i = 0; i < group_size; i++)
4682 /* 2. Create the vector-load in the loop. */
4683 switch (alignment_support_cheme)
4686 gcc_assert (aligned_access_p (first_dr));
4687 data_ref = build_fold_indirect_ref (dataref_ptr);
4689 case dr_unaligned_supported:
4691 int mis = DR_MISALIGNMENT (first_dr);
4692 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
4694 gcc_assert (!aligned_access_p (first_dr));
4695 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
4697 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
4700 case dr_unaligned_software_pipeline:
4701 gcc_assert (!aligned_access_p (first_dr));
4702 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
4707 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4708 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
4709 new_temp = make_ssa_name (vec_dest, new_stmt);
4710 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4711 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4712 copy_virtual_operands (new_stmt, stmt);
4713 mark_symbols_for_renaming (new_stmt);
4715 /* 3. Handle explicit realignment if necessary/supported. */
4716 if (alignment_support_cheme == dr_unaligned_software_pipeline)
4719 <vec_dest = realign_load (msq, lsq, realignment_token)> */
4720 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
4721 if (!realignment_token)
4722 realignment_token = dataref_ptr;
4723 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4725 build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, realignment_token);
4726 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
4727 new_temp = make_ssa_name (vec_dest, new_stmt);
4728 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4729 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4730 if (i == group_size - 1 && j == ncopies - 1)
4731 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
4735 VEC_quick_push (tree, dr_chain, new_temp);
4736 if (i < group_size - 1)
4737 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt);
4742 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
4744 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4745 dr_chain = VEC_alloc (tree, heap, group_size);
4750 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4752 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4753 prev_stmt_info = vinfo_for_stmt (new_stmt);
4761 /* Function vectorizable_live_operation.
4763 STMT computes a value that is used outside the loop. Check if
4764 it can be supported. */
4767 vectorizable_live_operation (tree stmt,
4768 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
4769 tree *vec_stmt ATTRIBUTE_UNUSED)
4772 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4778 enum vect_def_type dt;
4780 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
4782 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
4785 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4788 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4791 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4792 op_type = TREE_OPERAND_LENGTH (operation);
4794 /* FORNOW: support only if all uses are invariant. This means
4795 that the scalar operations can remain in place, unvectorized.
4796 The original last scalar value that they compute will be used. */
4798 for (i = 0; i < op_type; i++)
4800 op = TREE_OPERAND (operation, i);
4801 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4803 if (vect_print_dump_info (REPORT_DETAILS))
4804 fprintf (vect_dump, "use not simple.");
4808 if (dt != vect_invariant_def && dt != vect_constant_def)
4812 /* No transformation is required for the cases we currently support. */
4817 /* Function vect_is_simple_cond.
4820 LOOP - the loop that is being vectorized.
4821 COND - Condition that is checked for simple use.
4823 Returns whether a COND can be vectorized. Checks whether
4824 condition operands are supportable using vec_is_simple_use. */
4827 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
4831 enum vect_def_type dt;
4833 if (!COMPARISON_CLASS_P (cond))
4836 lhs = TREE_OPERAND (cond, 0);
4837 rhs = TREE_OPERAND (cond, 1);
4839 if (TREE_CODE (lhs) == SSA_NAME)
4841 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
4842 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
4845 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST)
4848 if (TREE_CODE (rhs) == SSA_NAME)
4850 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
4851 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
4854 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST)
4860 /* vectorizable_condition.
4862 Check if STMT is conditional modify expression that can be vectorized.
4863 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4864 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
4867 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4870 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
4872 tree scalar_dest = NULL_TREE;
4873 tree vec_dest = NULL_TREE;
4874 tree op = NULL_TREE;
4875 tree cond_expr, then_clause, else_clause;
4876 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4877 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4878 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
4879 tree vec_compare, vec_cond_expr;
4881 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4882 enum machine_mode vec_mode;
4884 enum vect_def_type dt;
4885 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4886 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4888 gcc_assert (ncopies >= 1);
4890 return false; /* FORNOW */
4892 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4895 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4898 /* FORNOW: not yet supported. */
4899 if (STMT_VINFO_LIVE_P (stmt_info))
4901 if (vect_print_dump_info (REPORT_DETAILS))
4902 fprintf (vect_dump, "value used after loop.");
4906 /* Is vectorizable conditional operation? */
4907 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4910 op = GIMPLE_STMT_OPERAND (stmt, 1);
4912 if (TREE_CODE (op) != COND_EXPR)
4915 cond_expr = TREE_OPERAND (op, 0);
4916 then_clause = TREE_OPERAND (op, 1);
4917 else_clause = TREE_OPERAND (op, 2);
4919 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
4922 /* We do not handle two different vector types for the condition
4924 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
4927 if (TREE_CODE (then_clause) == SSA_NAME)
4929 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
4930 if (!vect_is_simple_use (then_clause, loop_vinfo,
4931 &then_def_stmt, &def, &dt))
4934 else if (TREE_CODE (then_clause) != INTEGER_CST
4935 && TREE_CODE (then_clause) != REAL_CST)
4938 if (TREE_CODE (else_clause) == SSA_NAME)
4940 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
4941 if (!vect_is_simple_use (else_clause, loop_vinfo,
4942 &else_def_stmt, &def, &dt))
4945 else if (TREE_CODE (else_clause) != INTEGER_CST
4946 && TREE_CODE (else_clause) != REAL_CST)
4950 vec_mode = TYPE_MODE (vectype);
4954 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
4955 return expand_vec_cond_expr_p (op, vec_mode);
4961 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4962 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4964 /* Handle cond expr. */
4966 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
4968 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
4969 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
4970 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
4972 /* Arguments are ready. create the new vector stmt. */
4973 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
4974 vec_cond_lhs, vec_cond_rhs);
4975 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
4976 vec_compare, vec_then_clause, vec_else_clause);
4978 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
4979 new_temp = make_ssa_name (vec_dest, *vec_stmt);
4980 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
4981 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
4986 /* Function vect_transform_stmt.
4988 Create a vectorized stmt to replace STMT, and insert it at BSI. */
4991 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store)
4993 bool is_store = false;
4994 tree vec_stmt = NULL_TREE;
4995 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4996 tree orig_stmt_in_pattern;
4999 switch (STMT_VINFO_TYPE (stmt_info))
5001 case type_demotion_vec_info_type:
5002 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
5006 case type_promotion_vec_info_type:
5007 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
5011 case type_conversion_vec_info_type:
5012 done = vectorizable_conversion (stmt, bsi, &vec_stmt);
5016 case induc_vec_info_type:
5017 done = vectorizable_induction (stmt, bsi, &vec_stmt);
5021 case op_vec_info_type:
5022 done = vectorizable_operation (stmt, bsi, &vec_stmt);
5026 case assignment_vec_info_type:
5027 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
5031 case load_vec_info_type:
5032 done = vectorizable_load (stmt, bsi, &vec_stmt);
5036 case store_vec_info_type:
5037 done = vectorizable_store (stmt, bsi, &vec_stmt);
5039 if (DR_GROUP_FIRST_DR (stmt_info))
5041 /* In case of interleaving, the whole chain is vectorized when the
5042 last store in the chain is reached. Store stmts before the last
5043 one are skipped, and there vec_stmt_info shouldn't be freed
5045 *strided_store = true;
5046 if (STMT_VINFO_VEC_STMT (stmt_info))
5053 case condition_vec_info_type:
5054 done = vectorizable_condition (stmt, bsi, &vec_stmt);
5058 case call_vec_info_type:
5059 done = vectorizable_call (stmt, bsi, &vec_stmt);
5062 case reduc_vec_info_type:
5063 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
5068 if (!STMT_VINFO_LIVE_P (stmt_info))
5070 if (vect_print_dump_info (REPORT_DETAILS))
5071 fprintf (vect_dump, "stmt not supported.");
5076 if (STMT_VINFO_LIVE_P (stmt_info)
5077 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
5079 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
5085 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
5086 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
5087 if (orig_stmt_in_pattern)
5089 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
5090 /* STMT was inserted by the vectorizer to replace a computation idiom.
5091 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
5092 computed this idiom. We need to record a pointer to VEC_STMT in
5093 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
5094 documentation of vect_pattern_recog. */
5095 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
5097 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
5098 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
5107 /* This function builds ni_name = number of iterations loop executes
5108 on the loop preheader. */
5111 vect_build_loop_niters (loop_vec_info loop_vinfo)
5113 tree ni_name, stmt, var;
5115 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5116 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
5118 var = create_tmp_var (TREE_TYPE (ni), "niters");
5119 add_referenced_var (var);
5120 ni_name = force_gimple_operand (ni, &stmt, false, var);
5122 pe = loop_preheader_edge (loop);
5125 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5126 gcc_assert (!new_bb);
5133 /* This function generates the following statements:
5135 ni_name = number of iterations loop executes
5136 ratio = ni_name / vf
5137 ratio_mult_vf_name = ratio * vf
5139 and places them at the loop preheader edge. */
5142 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
5144 tree *ratio_mult_vf_name_ptr,
5145 tree *ratio_name_ptr)
5153 tree ratio_mult_vf_name;
5154 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5155 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
5156 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5159 pe = loop_preheader_edge (loop);
5161 /* Generate temporary variable that contains
5162 number of iterations loop executes. */
5164 ni_name = vect_build_loop_niters (loop_vinfo);
5165 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
5167 /* Create: ratio = ni >> log2(vf) */
5169 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
5170 if (!is_gimple_val (ratio_name))
5172 var = create_tmp_var (TREE_TYPE (ni), "bnd");
5173 add_referenced_var (var);
5175 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
5176 pe = loop_preheader_edge (loop);
5177 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5178 gcc_assert (!new_bb);
5181 /* Create: ratio_mult_vf = ratio << log2 (vf). */
5183 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
5184 ratio_name, log_vf);
5185 if (!is_gimple_val (ratio_mult_vf_name))
5187 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
5188 add_referenced_var (var);
5190 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
5192 pe = loop_preheader_edge (loop);
5193 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5194 gcc_assert (!new_bb);
5197 *ni_name_ptr = ni_name;
5198 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
5199 *ratio_name_ptr = ratio_name;
5205 /* Function update_vuses_to_preheader.
5208 STMT - a statement with potential VUSEs.
5209 LOOP - the loop whose preheader will contain STMT.
5211 It's possible to vectorize a loop even though an SSA_NAME from a VUSE
5212 appears to be defined in a VDEF in another statement in a loop.
5213 One such case is when the VUSE is at the dereference of a __restricted__
5214 pointer in a load and the VDEF is at the dereference of a different
5215 __restricted__ pointer in a store. Vectorization may result in
5216 copy_virtual_uses being called to copy the problematic VUSE to a new
5217 statement that is being inserted in the loop preheader. This procedure
5218 is called to change the SSA_NAME in the new statement's VUSE from the
5219 SSA_NAME updated in the loop to the related SSA_NAME available on the
5220 path entering the loop.
5222 When this function is called, we have the following situation:
5227 # name1 = phi < name0 , name2>
5232 # name2 = vdef <name1>
5237 Stmt S1 was created in the loop preheader block as part of misaligned-load
5238 handling. This function fixes the name of the vuse of S1 from 'name1' to
5242 update_vuses_to_preheader (tree stmt, struct loop *loop)
5244 basic_block header_bb = loop->header;
5245 edge preheader_e = loop_preheader_edge (loop);
5247 use_operand_p use_p;
5249 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_VUSE)
5251 tree ssa_name = USE_FROM_PTR (use_p);
5252 tree def_stmt = SSA_NAME_DEF_STMT (ssa_name);
5253 tree name_var = SSA_NAME_VAR (ssa_name);
5254 basic_block bb = bb_for_stmt (def_stmt);
5256 /* For a use before any definitions, def_stmt is a NOP_EXPR. */
5257 if (!IS_EMPTY_STMT (def_stmt)
5258 && flow_bb_inside_loop_p (loop, bb))
5260 /* If the block containing the statement defining the SSA_NAME
5261 is in the loop then it's necessary to find the definition
5262 outside the loop using the PHI nodes of the header. */
5264 bool updated = false;
5266 for (phi = phi_nodes (header_bb); phi; phi = PHI_CHAIN (phi))
5268 if (SSA_NAME_VAR (PHI_RESULT (phi)) == name_var)
5270 SET_USE (use_p, PHI_ARG_DEF (phi, preheader_e->dest_idx));
5275 gcc_assert (updated);
5281 /* Function vect_update_ivs_after_vectorizer.
5283 "Advance" the induction variables of LOOP to the value they should take
5284 after the execution of LOOP. This is currently necessary because the
5285 vectorizer does not handle induction variables that are used after the
5286 loop. Such a situation occurs when the last iterations of LOOP are
5288 1. We introduced new uses after LOOP for IVs that were not originally used
5289 after LOOP: the IVs of LOOP are now used by an epilog loop.
5290 2. LOOP is going to be vectorized; this means that it will iterate N/VF
5291 times, whereas the loop IVs should be bumped N times.
5294 - LOOP - a loop that is going to be vectorized. The last few iterations
5295 of LOOP were peeled.
5296 - NITERS - the number of iterations that LOOP executes (before it is
5297 vectorized). i.e, the number of times the ivs should be bumped.
5298 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
5299 coming out from LOOP on which there are uses of the LOOP ivs
5300 (this is the path from LOOP->exit to epilog_loop->preheader).
5302 The new definitions of the ivs are placed in LOOP->exit.
5303 The phi args associated with the edge UPDATE_E in the bb
5304 UPDATE_E->dest are updated accordingly.
5306 Assumption 1: Like the rest of the vectorizer, this function assumes
5307 a single loop exit that has a single predecessor.
5309 Assumption 2: The phi nodes in the LOOP header and in update_bb are
5310 organized in the same order.
5312 Assumption 3: The access function of the ivs is simple enough (see
5313 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
5315 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
5316 coming out of LOOP on which the ivs of LOOP are used (this is the path
5317 that leads to the epilog loop; other paths skip the epilog loop). This
5318 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
5319 needs to have its phis updated.
5323 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
5326 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5327 basic_block exit_bb = single_exit (loop)->dest;
5329 basic_block update_bb = update_e->dest;
5331 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
5333 /* Make sure there exists a single-predecessor exit bb: */
5334 gcc_assert (single_pred_p (exit_bb));
5336 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
5338 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
5340 tree access_fn = NULL;
5341 tree evolution_part;
5344 tree var, stmt, ni, ni_name;
5345 block_stmt_iterator last_bsi;
5347 if (vect_print_dump_info (REPORT_DETAILS))
5349 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
5350 print_generic_expr (vect_dump, phi, TDF_SLIM);
5353 /* Skip virtual phi's. */
5354 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5356 if (vect_print_dump_info (REPORT_DETAILS))
5357 fprintf (vect_dump, "virtual phi. skip.");
5361 /* Skip reduction phis. */
5362 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
5364 if (vect_print_dump_info (REPORT_DETAILS))
5365 fprintf (vect_dump, "reduc phi. skip.");
5369 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
5370 gcc_assert (access_fn);
5372 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
5373 gcc_assert (evolution_part != NULL_TREE);
5375 /* FORNOW: We do not support IVs whose evolution function is a polynomial
5376 of degree >= 2 or exponential. */
5377 gcc_assert (!tree_is_chrec (evolution_part));
5379 step_expr = evolution_part;
5380 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
5383 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
5384 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
5386 fold_convert (sizetype,
5387 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
5388 niters, step_expr)));
5390 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
5391 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
5392 fold_convert (TREE_TYPE (init_expr),
5399 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
5400 add_referenced_var (var);
5402 ni_name = force_gimple_operand (ni, &stmt, false, var);
5404 /* Insert stmt into exit_bb. */
5405 last_bsi = bsi_last (exit_bb);
5407 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
5409 /* Fix phi expressions in the successor bb. */
5410 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
5415 /* Function vect_do_peeling_for_loop_bound
5417 Peel the last iterations of the loop represented by LOOP_VINFO.
5418 The peeled iterations form a new epilog loop. Given that the loop now
5419 iterates NITERS times, the new epilog loop iterates
5420 NITERS % VECTORIZATION_FACTOR times.
5422 The original loop will later be made to iterate
5423 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
5426 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
5428 tree ni_name, ratio_mult_vf_name;
5429 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5430 struct loop *new_loop;
5432 basic_block preheader;
5435 int min_scalar_loop_bound;
5436 int min_profitable_iters;
5438 if (vect_print_dump_info (REPORT_DETAILS))
5439 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
5441 initialize_original_copy_tables ();
5443 /* Generate the following variables on the preheader of original loop:
5445 ni_name = number of iteration the original loop executes
5446 ratio = ni_name / vf
5447 ratio_mult_vf_name = ratio * vf */
5448 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
5449 &ratio_mult_vf_name, ratio);
5451 loop_num = loop->num;
5453 /* Analyze cost to set threshhold for vectorized loop. */
5454 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
5455 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
5456 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5458 /* Use the cost model only if it is more conservative than user specified
5461 th = (unsigned) min_scalar_loop_bound;
5462 if (min_profitable_iters
5463 && (!min_scalar_loop_bound
5464 || min_profitable_iters > min_scalar_loop_bound))
5465 th = (unsigned) min_profitable_iters;
5467 if (min_profitable_iters
5468 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5469 && vect_print_dump_info (REPORT_DETAILS))
5470 fprintf (vect_dump, "vectorization may not be profitable.");
5472 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
5473 ratio_mult_vf_name, ni_name, false,
5475 gcc_assert (new_loop);
5476 gcc_assert (loop_num == loop->num);
5477 #ifdef ENABLE_CHECKING
5478 slpeel_verify_cfg_after_peeling (loop, new_loop);
5481 /* A guard that controls whether the new_loop is to be executed or skipped
5482 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
5483 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
5484 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
5485 is on the path where the LOOP IVs are used and need to be updated. */
5487 preheader = loop_preheader_edge (new_loop)->src;
5488 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
5489 update_e = EDGE_PRED (preheader, 0);
5491 update_e = EDGE_PRED (preheader, 1);
5493 /* Update IVs of original loop as if they were advanced
5494 by ratio_mult_vf_name steps. */
5495 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
5497 /* After peeling we have to reset scalar evolution analyzer. */
5500 free_original_copy_tables ();
5504 /* Function vect_gen_niters_for_prolog_loop
5506 Set the number of iterations for the loop represented by LOOP_VINFO
5507 to the minimum between LOOP_NITERS (the original iteration count of the loop)
5508 and the misalignment of DR - the data reference recorded in
5509 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
5510 this loop, the data reference DR will refer to an aligned location.
5512 The following computation is generated:
5514 If the misalignment of DR is known at compile time:
5515 addr_mis = int mis = DR_MISALIGNMENT (dr);
5516 Else, compute address misalignment in bytes:
5517 addr_mis = addr & (vectype_size - 1)
5519 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
5521 (elem_size = element type size; an element is the scalar element
5522 whose type is the inner type of the vectype)
5526 prolog_niters = min ( LOOP_NITERS ,
5527 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
5528 where group_size is the size of the interleaved group.
5530 The above formulas assume that VF == number of elements in the vector. This
5531 may not hold when there are multiple-types in the loop.
5532 In this case, for some data-references in the loop the VF does not represent
5533 the number of elements that fit in the vector. Therefore, instead of VF we
5534 use TYPE_VECTOR_SUBPARTS. */
5537 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
5539 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
5540 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5542 tree iters, iters_name;
5545 tree dr_stmt = DR_STMT (dr);
5546 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
5547 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5548 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
5549 tree niters_type = TREE_TYPE (loop_niters);
5551 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
5552 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
5554 if (DR_GROUP_FIRST_DR (stmt_info))
5556 /* For interleaved access element size must be multiplied by the size of
5557 the interleaved group. */
5558 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
5559 DR_GROUP_FIRST_DR (stmt_info)));
5560 element_size *= group_size;
5563 pe = loop_preheader_edge (loop);
5565 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
5567 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
5568 int elem_misalign = byte_misalign / element_size;
5570 if (vect_print_dump_info (REPORT_DETAILS))
5571 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
5572 iters = build_int_cst (niters_type,
5573 (nelements - elem_misalign)&(nelements/group_size-1));
5577 tree new_stmts = NULL_TREE;
5579 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
5580 tree ptr_type = TREE_TYPE (start_addr);
5581 tree size = TYPE_SIZE (ptr_type);
5582 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
5583 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
5584 tree elem_size_log =
5585 build_int_cst (type, exact_log2 (vectype_align/nelements));
5586 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
5587 tree nelements_tree = build_int_cst (type, nelements);
5591 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
5592 gcc_assert (!new_bb);
5594 /* Create: byte_misalign = addr & (vectype_size - 1) */
5596 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
5598 /* Create: elem_misalign = byte_misalign / element_size */
5600 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
5602 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
5603 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
5604 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
5605 iters = fold_convert (niters_type, iters);
5608 /* Create: prolog_loop_niters = min (iters, loop_niters) */
5609 /* If the loop bound is known at compile time we already verified that it is
5610 greater than vf; since the misalignment ('iters') is at most vf, there's
5611 no need to generate the MIN_EXPR in this case. */
5612 if (TREE_CODE (loop_niters) != INTEGER_CST)
5613 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
5615 if (vect_print_dump_info (REPORT_DETAILS))
5617 fprintf (vect_dump, "niters for prolog loop: ");
5618 print_generic_expr (vect_dump, iters, TDF_SLIM);
5621 var = create_tmp_var (niters_type, "prolog_loop_niters");
5622 add_referenced_var (var);
5623 iters_name = force_gimple_operand (iters, &stmt, false, var);
5625 /* Insert stmt on loop preheader edge. */
5628 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
5629 gcc_assert (!new_bb);
5636 /* Function vect_update_init_of_dr
5638 NITERS iterations were peeled from LOOP. DR represents a data reference
5639 in LOOP. This function updates the information recorded in DR to
5640 account for the fact that the first NITERS iterations had already been
5641 executed. Specifically, it updates the OFFSET field of DR. */
5644 vect_update_init_of_dr (struct data_reference *dr, tree niters)
5646 tree offset = DR_OFFSET (dr);
5648 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
5649 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
5650 DR_OFFSET (dr) = offset;
5654 /* Function vect_update_inits_of_drs
5656 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
5657 This function updates the information recorded for the data references in
5658 the loop to account for the fact that the first NITERS iterations had
5659 already been executed. Specifically, it updates the initial_condition of
5660 the access_function of all the data_references in the loop. */
5663 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
5666 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
5667 struct data_reference *dr;
5669 if (vect_print_dump_info (REPORT_DETAILS))
5670 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
5672 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
5673 vect_update_init_of_dr (dr, niters);
5677 /* Function vect_do_peeling_for_alignment
5679 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
5680 'niters' is set to the misalignment of one of the data references in the
5681 loop, thereby forcing it to refer to an aligned location at the beginning
5682 of the execution of this loop. The data reference for which we are
5683 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
5686 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
5688 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5689 tree niters_of_prolog_loop, ni_name;
5691 struct loop *new_loop;
5693 if (vect_print_dump_info (REPORT_DETAILS))
5694 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
5696 initialize_original_copy_tables ();
5698 ni_name = vect_build_loop_niters (loop_vinfo);
5699 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
5701 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
5703 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
5704 niters_of_prolog_loop, ni_name, true, 0);
5705 gcc_assert (new_loop);
5706 #ifdef ENABLE_CHECKING
5707 slpeel_verify_cfg_after_peeling (new_loop, loop);
5710 /* Update number of times loop executes. */
5711 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
5712 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
5713 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
5715 /* Update the init conditions of the access functions of all data refs. */
5716 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
5718 /* After peeling we have to reset scalar evolution analyzer. */
5721 free_original_copy_tables ();
5725 /* Function vect_create_cond_for_align_checks.
5727 Create a conditional expression that represents the alignment checks for
5728 all of data references (array element references) whose alignment must be
5732 LOOP_VINFO - two fields of the loop information are used.
5733 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
5734 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
5737 COND_EXPR_STMT_LIST - statements needed to construct the conditional
5739 The returned value is the conditional expression to be used in the if
5740 statement that controls which version of the loop gets executed at runtime.
5742 The algorithm makes two assumptions:
5743 1) The number of bytes "n" in a vector is a power of 2.
5744 2) An address "a" is aligned if a%n is zero and that this
5745 test can be done as a&(n-1) == 0. For example, for 16
5746 byte vectors the test is a&0xf == 0. */
5749 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
5750 tree *cond_expr_stmt_list)
5752 VEC(tree,heap) *may_misalign_stmts
5753 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
5755 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
5759 tree int_ptrsize_type;
5761 tree or_tmp_name = NULL_TREE;
5762 tree and_tmp, and_tmp_name, and_stmt;
5765 /* Check that mask is one less than a power of 2, i.e., mask is
5766 all zeros followed by all ones. */
5767 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
5769 /* CHECKME: what is the best integer or unsigned type to use to hold a
5770 cast from a pointer value? */
5771 psize = TYPE_SIZE (ptr_type_node);
5773 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
5775 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
5776 of the first vector of the i'th data reference. */
5778 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
5780 tree new_stmt_list = NULL_TREE;
5782 tree addr_tmp, addr_tmp_name, addr_stmt;
5783 tree or_tmp, new_or_tmp_name, or_stmt;
5785 /* create: addr_tmp = (int)(address_of_first_vector) */
5786 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
5790 if (new_stmt_list != NULL_TREE)
5791 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
5793 sprintf (tmp_name, "%s%d", "addr2int", i);
5794 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5795 add_referenced_var (addr_tmp);
5796 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
5797 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
5798 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
5799 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
5800 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
5802 /* The addresses are OR together. */
5804 if (or_tmp_name != NULL_TREE)
5806 /* create: or_tmp = or_tmp | addr_tmp */
5807 sprintf (tmp_name, "%s%d", "orptrs", i);
5808 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
5809 add_referenced_var (or_tmp);
5810 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
5811 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
5812 or_tmp_name, addr_tmp_name);
5813 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
5814 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
5815 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
5816 or_tmp_name = new_or_tmp_name;
5819 or_tmp_name = addr_tmp_name;
5823 mask_cst = build_int_cst (int_ptrsize_type, mask);
5825 /* create: and_tmp = or_tmp & mask */
5826 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
5827 add_referenced_var (and_tmp);
5828 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
5830 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
5831 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
5832 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
5833 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
5835 /* Make and_tmp the left operand of the conditional test against zero.
5836 if and_tmp has a nonzero bit then some address is unaligned. */
5837 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
5838 return build2 (EQ_EXPR, boolean_type_node,
5839 and_tmp_name, ptrsize_zero);
5843 /* Function vect_transform_loop.
5845 The analysis phase has determined that the loop is vectorizable.
5846 Vectorize the loop - created vectorized stmts to replace the scalar
5847 stmts in the loop, and update the loop exit condition. */
5850 vect_transform_loop (loop_vec_info loop_vinfo)
5852 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5853 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5854 int nbbs = loop->num_nodes;
5855 block_stmt_iterator si, next_si;
5858 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
5861 if (vect_print_dump_info (REPORT_DETAILS))
5862 fprintf (vect_dump, "=== vec_transform_loop ===");
5864 /* If the loop has data references that may or may not be aligned then
5865 two versions of the loop need to be generated, one which is vectorized
5866 and one which isn't. A test is then generated to control which of the
5867 loops is executed. The test checks for the alignment of all of the
5868 data references that may or may not be aligned. */
5870 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
5874 tree cond_expr_stmt_list = NULL_TREE;
5875 basic_block condition_bb;
5876 block_stmt_iterator cond_exp_bsi;
5877 basic_block merge_bb;
5878 basic_block new_exit_bb;
5880 tree orig_phi, new_phi, arg;
5881 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
5883 cond_expr = vect_create_cond_for_align_checks (loop_vinfo,
5884 &cond_expr_stmt_list);
5885 initialize_original_copy_tables ();
5886 nloop = loop_version (loop, cond_expr, &condition_bb,
5887 prob, prob, REG_BR_PROB_BASE - prob, true);
5888 free_original_copy_tables();
5890 /** Loop versioning violates an assumption we try to maintain during
5891 vectorization - that the loop exit block has a single predecessor.
5892 After versioning, the exit block of both loop versions is the same
5893 basic block (i.e. it has two predecessors). Just in order to simplify
5894 following transformations in the vectorizer, we fix this situation
5895 here by adding a new (empty) block on the exit-edge of the loop,
5896 with the proper loop-exit phis to maintain loop-closed-form. **/
5898 merge_bb = single_exit (loop)->dest;
5899 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
5900 new_exit_bb = split_edge (single_exit (loop));
5901 new_exit_e = single_exit (loop);
5902 e = EDGE_SUCC (new_exit_bb, 0);
5904 for (orig_phi = phi_nodes (merge_bb); orig_phi;
5905 orig_phi = PHI_CHAIN (orig_phi))
5907 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
5909 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5910 add_phi_arg (new_phi, arg, new_exit_e);
5911 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
5914 /** end loop-exit-fixes after versioning **/
5916 update_ssa (TODO_update_ssa);
5917 cond_exp_bsi = bsi_last (condition_bb);
5918 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
5921 /* CHECKME: we wouldn't need this if we called update_ssa once
5923 bitmap_zero (vect_memsyms_to_rename);
5925 /* Peel the loop if there are data refs with unknown alignment.
5926 Only one data ref with unknown store is allowed. */
5928 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
5929 vect_do_peeling_for_alignment (loop_vinfo);
5931 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
5932 compile time constant), or it is a constant that doesn't divide by the
5933 vectorization factor, then an epilog loop needs to be created.
5934 We therefore duplicate the loop: the original loop will be vectorized,
5935 and will compute the first (n/VF) iterations. The second copy of the loop
5936 will remain scalar and will compute the remaining (n%VF) iterations.
5937 (VF is the vectorization factor). */
5939 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5940 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
5941 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
5942 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
5944 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
5945 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
5947 /* 1) Make sure the loop header has exactly two entries
5948 2) Make sure we have a preheader basic block. */
5950 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
5952 split_edge (loop_preheader_edge (loop));
5954 /* FORNOW: the vectorizer supports only loops which body consist
5955 of one basic block (header + empty latch). When the vectorizer will
5956 support more involved loop forms, the order by which the BBs are
5957 traversed need to be reconsidered. */
5959 for (i = 0; i < nbbs; i++)
5961 basic_block bb = bbs[i];
5962 stmt_vec_info stmt_info;
5965 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5967 if (vect_print_dump_info (REPORT_DETAILS))
5969 fprintf (vect_dump, "------>vectorizing phi: ");
5970 print_generic_expr (vect_dump, phi, TDF_SLIM);
5972 stmt_info = vinfo_for_stmt (phi);
5975 if (!STMT_VINFO_RELEVANT_P (stmt_info)
5976 && !STMT_VINFO_LIVE_P (stmt_info))
5979 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
5980 != (unsigned HOST_WIDE_INT) vectorization_factor)
5981 && vect_print_dump_info (REPORT_DETAILS))
5982 fprintf (vect_dump, "multiple-types.");
5984 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
5986 if (vect_print_dump_info (REPORT_DETAILS))
5987 fprintf (vect_dump, "transform phi.");
5988 vect_transform_stmt (phi, NULL, NULL);
5992 for (si = bsi_start (bb); !bsi_end_p (si);)
5994 tree stmt = bsi_stmt (si);
5997 if (vect_print_dump_info (REPORT_DETAILS))
5999 fprintf (vect_dump, "------>vectorizing statement: ");
6000 print_generic_expr (vect_dump, stmt, TDF_SLIM);
6002 stmt_info = vinfo_for_stmt (stmt);
6003 gcc_assert (stmt_info);
6004 if (!STMT_VINFO_RELEVANT_P (stmt_info)
6005 && !STMT_VINFO_LIVE_P (stmt_info))
6011 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
6012 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
6013 != (unsigned HOST_WIDE_INT) vectorization_factor)
6014 && vect_print_dump_info (REPORT_DETAILS))
6015 fprintf (vect_dump, "multiple-types.");
6017 /* -------- vectorize statement ------------ */
6018 if (vect_print_dump_info (REPORT_DETAILS))
6019 fprintf (vect_dump, "transform statement.");
6021 strided_store = false;
6022 is_store = vect_transform_stmt (stmt, &si, &strided_store);
6026 if (DR_GROUP_FIRST_DR (stmt_info))
6028 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
6029 interleaving chain was completed - free all the stores in
6031 tree next = DR_GROUP_FIRST_DR (stmt_info);
6033 stmt_vec_info next_stmt_info;
6037 next_si = bsi_for_stmt (next);
6038 next_stmt_info = vinfo_for_stmt (next);
6039 /* Free the attached stmt_vec_info and remove the stmt. */
6040 ann = stmt_ann (next);
6041 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
6042 free (next_stmt_info);
6043 set_stmt_info (ann, NULL);
6044 bsi_remove (&next_si, true);
6047 bsi_remove (&si, true);
6052 /* Free the attached stmt_vec_info and remove the stmt. */
6053 ann = stmt_ann (stmt);
6055 set_stmt_info (ann, NULL);
6056 bsi_remove (&si, true);
6064 slpeel_make_loop_iterate_ntimes (loop, ratio);
6066 mark_set_for_renaming (vect_memsyms_to_rename);
6068 /* The memory tags and pointers in vectorized statements need to
6069 have their SSA forms updated. FIXME, why can't this be delayed
6070 until all the loops have been transformed? */
6071 update_ssa (TODO_update_ssa);
6073 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6074 fprintf (vect_dump, "LOOP VECTORIZED.");