1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *, slp_tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, struct loop*, tree, tree *, tree *, bool, tree, bool *);
53 static tree vect_create_addr_base_for_vector_ref
54 (tree, tree *, tree, struct loop *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree, block_stmt_iterator *);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static int vect_min_worthwhile_factor (enum tree_code);
76 cost_for_stmt (tree stmt)
78 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
80 switch (STMT_VINFO_TYPE (stmt_info))
82 case load_vec_info_type:
83 return TARG_SCALAR_LOAD_COST;
84 case store_vec_info_type:
85 return TARG_SCALAR_STORE_COST;
86 case op_vec_info_type:
87 case condition_vec_info_type:
88 case assignment_vec_info_type:
89 case reduc_vec_info_type:
90 case induc_vec_info_type:
91 case type_promotion_vec_info_type:
92 case type_demotion_vec_info_type:
93 case type_conversion_vec_info_type:
94 case call_vec_info_type:
95 return TARG_SCALAR_STMT_COST;
96 case undef_vec_info_type:
103 /* Function vect_estimate_min_profitable_iters
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
113 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
116 int min_profitable_iters;
117 int peel_iters_prologue;
118 int peel_iters_epilogue;
119 int vec_inside_cost = 0;
120 int vec_outside_cost = 0;
121 int scalar_single_iter_cost = 0;
122 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
125 int nbbs = loop->num_nodes;
127 int peel_guard_costs = 0;
128 int innerloop_iters = 0, factor;
129 VEC (slp_instance, heap) *slp_instances;
130 slp_instance instance;
132 /* Cost model disabled. */
133 if (!flag_vect_cost_model)
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "cost model disabled.");
140 /* Requires loop versioning tests to handle misalignment.
141 FIXME: Make cost depend on number of stmts in may_misalign list. */
143 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
145 vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
146 if (vect_print_dump_info (REPORT_DETAILS))
147 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
151 /* Count statements in scalar loop. Using this as scalar cost for a single
154 TODO: Add outer loop support.
156 TODO: Consider assigning different costs to different scalar
161 innerloop_iters = 50; /* FIXME */
163 for (i = 0; i < nbbs; i++)
165 block_stmt_iterator si;
166 basic_block bb = bbs[i];
168 if (bb->loop_father == loop->inner)
169 factor = innerloop_iters;
173 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
175 tree stmt = bsi_stmt (si);
176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
177 if (!STMT_VINFO_RELEVANT_P (stmt_info)
178 && !STMT_VINFO_LIVE_P (stmt_info))
180 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
181 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
182 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
183 some of the "outside" costs are generated inside the outer-loop. */
184 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
188 /* Add additional cost for the peeled instructions in prologue and epilogue
191 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
192 at compile-time - we assume it's vf/2 (the worst would be vf-1).
194 TODO: Build an expression that represents peel_iters for prologue and
195 epilogue to be used in a run-time test. */
197 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
199 if (byte_misalign < 0)
201 peel_iters_prologue = vf/2;
202 if (vect_print_dump_info (REPORT_DETAILS))
203 fprintf (vect_dump, "cost model: "
204 "prologue peel iters set to vf/2.");
206 /* If peeling for alignment is unknown, loop bound of main loop becomes
208 peel_iters_epilogue = vf/2;
209 if (vect_print_dump_info (REPORT_DETAILS))
210 fprintf (vect_dump, "cost model: "
211 "epilogue peel iters set to vf/2 because "
212 "peeling for alignment is unknown .");
214 /* If peeled iterations are unknown, count a taken branch and a not taken
215 branch per peeled loop. Even if scalar loop iterations are known,
216 vector iterations are not known since peeled prologue iterations are
217 not known. Hence guards remain the same. */
218 peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
219 + TARG_COND_NOT_TAKEN_BRANCH_COST);
226 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
227 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
228 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
229 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
231 peel_iters_prologue = nelements - (byte_misalign / element_size);
234 peel_iters_prologue = 0;
236 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
238 peel_iters_epilogue = vf/2;
239 if (vect_print_dump_info (REPORT_DETAILS))
240 fprintf (vect_dump, "cost model: "
241 "epilogue peel iters set to vf/2 because "
242 "loop iterations are unknown .");
244 /* If peeled iterations are known but number of scalar loop
245 iterations are unknown, count a taken branch per peeled loop. */
246 peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
251 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
252 peel_iters_prologue = niters < peel_iters_prologue ?
253 niters : peel_iters_prologue;
254 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
258 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
259 + (peel_iters_epilogue * scalar_single_iter_cost)
262 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
263 information we provide for the target is whether testing against the
264 threshold involves a runtime test. */
265 if (targetm.vectorize.builtin_vectorization_cost)
267 bool runtime_test = false;
269 /* If the number of iterations is unknown, or the
270 peeling-for-misalignment amount is unknown, we eill have to generate
271 a runtime test to test the loop count against the threshold. */
272 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
273 || (byte_misalign < 0))
276 targetm.vectorize.builtin_vectorization_cost (runtime_test);
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
279 targetm.vectorize.builtin_vectorization_cost (runtime_test));
283 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
284 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
286 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
287 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
290 /* Calculate number of iterations required to make the vector version
291 profitable, relative to the loop bodies only. The following condition
292 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
293 SIC = scalar iteration cost, VIC = vector iteration cost,
294 VOC = vector outside cost and VF = vectorization factor. */
296 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
298 if (vec_outside_cost <= 0)
299 min_profitable_iters = 1;
302 min_profitable_iters = (vec_outside_cost * vf
303 - vec_inside_cost * peel_iters_prologue
304 - vec_inside_cost * peel_iters_epilogue)
305 / ((scalar_single_iter_cost * vf)
308 if ((scalar_single_iter_cost * vf * min_profitable_iters)
309 <= ((vec_inside_cost * min_profitable_iters)
310 + (vec_outside_cost * vf)))
311 min_profitable_iters++;
314 /* vector version will never be profitable. */
317 if (vect_print_dump_info (REPORT_DETAILS))
318 fprintf (vect_dump, "cost model: vector iteration cost = %d "
319 "is divisible by scalar iteration cost = %d by a factor "
320 "greater than or equal to the vectorization factor = %d .",
321 vec_inside_cost, scalar_single_iter_cost, vf);
325 if (vect_print_dump_info (REPORT_DETAILS))
327 fprintf (vect_dump, "Cost model analysis: \n");
328 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
330 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
332 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
333 fprintf (vect_dump, " prologue iterations: %d\n",
334 peel_iters_prologue);
335 fprintf (vect_dump, " epilogue iterations: %d\n",
336 peel_iters_epilogue);
337 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
338 min_profitable_iters);
341 min_profitable_iters =
342 min_profitable_iters < vf ? vf : min_profitable_iters;
344 /* Because the condition we create is:
345 if (niters <= min_profitable_iters)
346 then skip the vectorized loop. */
347 min_profitable_iters--;
349 if (vect_print_dump_info (REPORT_DETAILS))
350 fprintf (vect_dump, " Profitability threshold = %d\n",
351 min_profitable_iters);
353 return min_profitable_iters;
357 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
358 functions. Design better to avoid maintenance issues. */
360 /* Function vect_model_reduction_cost.
362 Models cost for a reduction operation, including the vector ops
363 generated within the strip-mine loop, the initial definition before
364 the loop, and the epilogue code that must be generated. */
367 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
376 enum machine_mode mode;
377 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
378 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
379 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
380 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
382 /* Cost of reduction op inside loop. */
383 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
385 reduction_op = TREE_OPERAND (operation, op_type-1);
386 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
387 mode = TYPE_MODE (vectype);
388 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
391 orig_stmt = STMT_VINFO_STMT (stmt_info);
393 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
395 /* Add in cost for initial definition. */
396 outer_cost += TARG_SCALAR_TO_VEC_COST;
398 /* Determine cost of epilogue code.
400 We have a reduction operator that will reduce the vector in one statement.
401 Also requires scalar extract. */
403 if (!nested_in_vect_loop_p (loop, orig_stmt))
405 if (reduc_code < NUM_TREE_CODES)
406 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
409 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
411 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
412 int element_bitsize = tree_low_cst (bitsize, 1);
413 int nelements = vec_size_in_bits / element_bitsize;
415 optab = optab_for_tree_code (code, vectype);
417 /* We have a whole vector shift available. */
418 if (VECTOR_MODE_P (mode)
419 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
420 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
421 /* Final reduction via vector shifts and the reduction operator. Also
422 requires scalar extract. */
423 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
424 + TARG_VEC_TO_SCALAR_COST);
426 /* Use extracts and reduction op for final reduction. For N elements,
427 we have N extracts and N-1 reduction ops. */
428 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
432 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
434 if (vect_print_dump_info (REPORT_DETAILS))
435 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
436 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
437 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
441 /* Function vect_model_induction_cost.
443 Models cost for induction operations. */
446 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
448 /* loop cost for vec_loop. */
449 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
450 /* prologue cost for vec_init and vec_step. */
451 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
453 if (vect_print_dump_info (REPORT_DETAILS))
454 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
455 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
456 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
460 /* Function vect_model_simple_cost.
462 Models cost for simple operations, i.e. those that only emit ncopies of a
463 single op. Right now, this does not account for multiple insns that could
464 be generated for the single vector op. We will handle that shortly. */
467 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
468 enum vect_def_type *dt, slp_tree slp_node)
471 int inside_cost = 0, outside_cost = 0;
473 inside_cost = ncopies * TARG_VEC_STMT_COST;
475 /* FORNOW: Assuming maximum 2 args per stmts. */
476 for (i = 0; i < 2; i++)
478 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
479 outside_cost += TARG_SCALAR_TO_VEC_COST;
482 if (vect_print_dump_info (REPORT_DETAILS))
483 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
484 "outside_cost = %d .", inside_cost, outside_cost);
486 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
487 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
488 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
492 /* Function vect_cost_strided_group_size
494 For strided load or store, return the group_size only if it is the first
495 load or store of a group, else return 1. This ensures that group size is
496 only returned once per group. */
499 vect_cost_strided_group_size (stmt_vec_info stmt_info)
501 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
503 if (first_stmt == STMT_VINFO_STMT (stmt_info))
504 return DR_GROUP_SIZE (stmt_info);
510 /* Function vect_model_store_cost
512 Models cost for stores. In the case of strided accesses, one access
513 has the overhead of the strided access attributed to it. */
516 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
517 enum vect_def_type dt, slp_tree slp_node)
520 int inside_cost = 0, outside_cost = 0;
522 if (dt == vect_constant_def || dt == vect_invariant_def)
523 outside_cost = TARG_SCALAR_TO_VEC_COST;
525 /* Strided access? */
526 if (DR_GROUP_FIRST_DR (stmt_info))
527 group_size = vect_cost_strided_group_size (stmt_info);
528 /* Not a strided access. */
532 /* Is this an access in a group of stores, which provide strided access?
533 If so, add in the cost of the permutes. */
536 /* Uses a high and low interleave operation for each needed permute. */
537 inside_cost = ncopies * exact_log2(group_size) * group_size
538 * TARG_VEC_STMT_COST;
540 if (vect_print_dump_info (REPORT_DETAILS))
541 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
546 /* Costs of the stores. */
547 inside_cost += ncopies * TARG_VEC_STORE_COST;
549 if (vect_print_dump_info (REPORT_DETAILS))
550 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
551 "outside_cost = %d .", inside_cost, outside_cost);
553 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
554 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
555 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
559 /* Function vect_model_load_cost
561 Models cost for loads. In the case of strided accesses, the last access
562 has the overhead of the strided access attributed to it. Since unaligned
563 accesses are supported for loads, we also account for the costs of the
564 access scheme chosen. */
567 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
571 int alignment_support_cheme;
573 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
574 int inside_cost = 0, outside_cost = 0;
576 /* Strided accesses? */
577 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
578 if (first_stmt && !slp_node)
580 group_size = vect_cost_strided_group_size (stmt_info);
581 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
583 /* Not a strided access. */
590 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
592 /* Is this an access in a group of loads providing strided access?
593 If so, add in the cost of the permutes. */
596 /* Uses an even and odd extract operations for each needed permute. */
597 inside_cost = ncopies * exact_log2(group_size) * group_size
598 * TARG_VEC_STMT_COST;
600 if (vect_print_dump_info (REPORT_DETAILS))
601 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
606 /* The loads themselves. */
607 switch (alignment_support_cheme)
611 inside_cost += ncopies * TARG_VEC_LOAD_COST;
613 if (vect_print_dump_info (REPORT_DETAILS))
614 fprintf (vect_dump, "vect_model_load_cost: aligned.");
618 case dr_unaligned_supported:
620 /* Here, we assign an additional cost for the unaligned load. */
621 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
623 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
629 case dr_explicit_realign:
631 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
633 /* FIXME: If the misalignment remains fixed across the iterations of
634 the containing loop, the following cost should be added to the
636 if (targetm.vectorize.builtin_mask_for_load)
637 inside_cost += TARG_VEC_STMT_COST;
641 case dr_explicit_realign_optimized:
643 if (vect_print_dump_info (REPORT_DETAILS))
644 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
647 /* Unaligned software pipeline has a load of an address, an initial
648 load, and possibly a mask operation to "prime" the loop. However,
649 if this is an access in a group of loads, which provide strided
650 access, then the above cost should only be considered for one
651 access in the group. Inside the loop, there is a load op
652 and a realignment op. */
654 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
656 outside_cost = 2*TARG_VEC_STMT_COST;
657 if (targetm.vectorize.builtin_mask_for_load)
658 outside_cost += TARG_VEC_STMT_COST;
661 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
670 if (vect_print_dump_info (REPORT_DETAILS))
671 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
672 "outside_cost = %d .", inside_cost, outside_cost);
674 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
675 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
676 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
680 /* Function vect_get_new_vect_var.
682 Returns a name for a new variable. The current naming scheme appends the
683 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
684 the name of vectorizer generated variables, and appends that to NAME if
688 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
695 case vect_simple_var:
698 case vect_scalar_var:
701 case vect_pointer_var:
710 char* tmp = concat (prefix, name, NULL);
711 new_vect_var = create_tmp_var (type, tmp);
715 new_vect_var = create_tmp_var (type, prefix);
717 /* Mark vector typed variable as a gimple register variable. */
718 if (TREE_CODE (type) == VECTOR_TYPE)
719 DECL_GIMPLE_REG_P (new_vect_var) = true;
725 /* Function vect_create_addr_base_for_vector_ref.
727 Create an expression that computes the address of the first memory location
728 that will be accessed for a data reference.
731 STMT: The statement containing the data reference.
732 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
733 OFFSET: Optional. If supplied, it is be added to the initial address.
734 LOOP: Specify relative to which loop-nest should the address be computed.
735 For example, when the dataref is in an inner-loop nested in an
736 outer-loop that is now being vectorized, LOOP can be either the
737 outer-loop, or the inner-loop. The first memory location accessed
738 by the following dataref ('in' points to short):
745 if LOOP=i_loop: &in (relative to i_loop)
746 if LOOP=j_loop: &in+i*2B (relative to j_loop)
749 1. Return an SSA_NAME whose value is the address of the memory location of
750 the first vector of the data reference.
751 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
752 these statement(s) which define the returned SSA_NAME.
754 FORNOW: We are only handling array accesses with step 1. */
757 vect_create_addr_base_for_vector_ref (tree stmt,
762 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
763 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
764 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
765 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
767 tree data_ref_base_var;
770 tree addr_base, addr_expr;
772 tree base_offset = unshare_expr (DR_OFFSET (dr));
773 tree init = unshare_expr (DR_INIT (dr));
774 tree vect_ptr_type, addr_expr2;
775 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
778 if (loop != containing_loop)
780 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
781 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
783 gcc_assert (nested_in_vect_loop_p (loop, stmt));
785 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
786 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
787 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
790 /* Create data_ref_base */
791 base_name = build_fold_indirect_ref (data_ref_base);
792 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
793 add_referenced_var (data_ref_base_var);
794 data_ref_base = force_gimple_operand (data_ref_base, &new_base_stmt,
795 true, data_ref_base_var);
796 append_to_statement_list_force(new_base_stmt, new_stmt_list);
798 /* Create base_offset */
799 base_offset = size_binop (PLUS_EXPR, base_offset, init);
800 base_offset = fold_convert (sizetype, base_offset);
801 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
802 add_referenced_var (dest);
803 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
804 append_to_statement_list_force (new_stmt, new_stmt_list);
808 tree tmp = create_tmp_var (sizetype, "offset");
810 add_referenced_var (tmp);
811 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
812 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
813 base_offset, offset);
814 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
815 append_to_statement_list_force (new_stmt, new_stmt_list);
818 /* base + base_offset */
819 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
820 data_ref_base, base_offset);
822 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
824 /* addr_expr = addr_base */
825 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
826 get_name (base_name));
827 add_referenced_var (addr_expr);
828 vec_stmt = fold_convert (vect_ptr_type, addr_base);
829 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
830 get_name (base_name));
831 add_referenced_var (addr_expr2);
832 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
833 append_to_statement_list_force (new_stmt, new_stmt_list);
835 if (vect_print_dump_info (REPORT_DETAILS))
837 fprintf (vect_dump, "created ");
838 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
844 /* Function vect_create_data_ref_ptr.
846 Create a new pointer to vector type (vp), that points to the first location
847 accessed in the loop by STMT, along with the def-use update chain to
848 appropriately advance the pointer through the loop iterations. Also set
849 aliasing information for the pointer. This vector pointer is used by the
850 callers to this function to create a memory reference expression for vector
854 1. STMT: a stmt that references memory. Expected to be of the form
855 GIMPLE_MODIFY_STMT <name, data-ref> or
856 GIMPLE_MODIFY_STMT <data-ref, name>.
857 2. AT_LOOP: the loop where the vector memref is to be created.
858 3. OFFSET (optional): an offset to be added to the initial address accessed
859 by the data-ref in STMT.
860 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
861 pointing to the initial address.
862 5. TYPE: if not NULL indicates the required type of the data-ref
865 1. Declare a new ptr to vector_type, and have it point to the base of the
866 data reference (initial addressed accessed by the data reference).
867 For example, for vector of type V8HI, the following code is generated:
870 vp = (v8hi *)initial_address;
872 if OFFSET is not supplied:
873 initial_address = &a[init];
874 if OFFSET is supplied:
875 initial_address = &a[init + OFFSET];
877 Return the initial_address in INITIAL_ADDRESS.
879 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
880 update the pointer in each iteration of the loop.
882 Return the increment stmt that updates the pointer in PTR_INCR.
884 3. Set INV_P to true if the access pattern of the data reference in the
885 vectorized loop is invariant. Set it to false otherwise.
887 4. Return the pointer. */
890 vect_create_data_ref_ptr (tree stmt, struct loop *at_loop,
891 tree offset, tree *initial_address, tree *ptr_incr,
892 bool only_init, tree type, bool *inv_p)
895 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
897 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
898 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
899 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
900 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
906 tree new_stmt_list = NULL_TREE;
910 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
912 block_stmt_iterator incr_bsi;
914 tree indx_before_incr, indx_after_incr;
918 /* Check the step (evolution) of the load in LOOP, and record
919 whether it's invariant. */
920 if (nested_in_vect_loop)
921 step = STMT_VINFO_DR_STEP (stmt_info);
923 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
925 if (tree_int_cst_compare (step, size_zero_node) == 0)
930 /* Create an expression for the first address accessed by this load
932 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
934 if (vect_print_dump_info (REPORT_DETAILS))
936 tree data_ref_base = base_name;
937 fprintf (vect_dump, "create vector-pointer variable to type: ");
938 print_generic_expr (vect_dump, vectype, TDF_SLIM);
939 if (TREE_CODE (data_ref_base) == VAR_DECL)
940 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
941 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
942 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
943 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
944 fprintf (vect_dump, " vectorizing a record based array ref: ");
945 else if (TREE_CODE (data_ref_base) == SSA_NAME)
946 fprintf (vect_dump, " vectorizing a pointer ref: ");
947 print_generic_expr (vect_dump, base_name, TDF_SLIM);
950 /** (1) Create the new vector-pointer variable: **/
952 vect_ptr_type = build_pointer_type (type);
954 vect_ptr_type = build_pointer_type (vectype);
955 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
956 get_name (base_name));
957 add_referenced_var (vect_ptr);
959 /** (2) Add aliasing information to the new vector-pointer:
960 (The points-to info (DR_PTR_INFO) may be defined later.) **/
962 tag = DR_SYMBOL_TAG (dr);
965 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
966 tag must be created with tag added to its may alias list. */
968 new_type_alias (vect_ptr, tag, DR_REF (dr));
970 set_symbol_mem_tag (vect_ptr, tag);
972 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
974 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
975 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
976 def-use update cycles for the pointer: One relative to the outer-loop
977 (LOOP), which is what steps (3) and (4) below do. The other is relative
978 to the inner-loop (which is the inner-most loop containing the dataref),
979 and this is done be step (5) below.
981 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
982 inner-most loop, and so steps (3),(4) work the same, and step (5) is
983 redundant. Steps (3),(4) create the following:
986 LOOP: vp1 = phi(vp0,vp2)
992 If there is an inner-loop nested in loop, then step (5) will also be
993 applied, and an additional update in the inner-loop will be created:
996 LOOP: vp1 = phi(vp0,vp2)
998 inner: vp3 = phi(vp1,vp4)
999 vp4 = vp3 + inner_step
1005 /** (3) Calculate the initial address the vector-pointer, and set
1006 the vector-pointer to point to it before the loop: **/
1008 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1010 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1012 pe = loop_preheader_edge (loop);
1013 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1014 gcc_assert (!new_bb);
1015 *initial_address = new_temp;
1017 /* Create: p = (vectype *) initial_base */
1018 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1019 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
1020 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
1021 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
1022 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1023 gcc_assert (!new_bb);
1026 /** (4) Handle the updating of the vector-pointer inside the loop.
1027 This is needed when ONLY_INIT is false, and also when AT_LOOP
1028 is the inner-loop nested in LOOP (during outer-loop vectorization).
1031 if (only_init && at_loop == loop) /* No update in loop is required. */
1033 /* Copy the points-to information if it exists. */
1034 if (DR_PTR_INFO (dr))
1035 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
1036 vptr = vect_ptr_init;
1040 /* The step of the vector pointer is the Vector Size. */
1041 tree step = TYPE_SIZE_UNIT (vectype);
1042 /* One exception to the above is when the scalar step of the load in
1043 LOOP is zero. In this case the step here is also zero. */
1045 step = size_zero_node;
1047 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1049 create_iv (vect_ptr_init,
1050 fold_convert (vect_ptr_type, step),
1051 NULL_TREE, loop, &incr_bsi, insert_after,
1052 &indx_before_incr, &indx_after_incr);
1053 incr = bsi_stmt (incr_bsi);
1054 set_stmt_info (stmt_ann (incr),
1055 new_stmt_vec_info (incr, loop_vinfo));
1057 /* Copy the points-to information if it exists. */
1058 if (DR_PTR_INFO (dr))
1060 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1061 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1063 merge_alias_info (vect_ptr_init, indx_before_incr);
1064 merge_alias_info (vect_ptr_init, indx_after_incr);
1068 vptr = indx_before_incr;
1071 if (!nested_in_vect_loop || only_init)
1075 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1076 nested in LOOP, if exists: **/
1078 gcc_assert (nested_in_vect_loop);
1081 standard_iv_increment_position (containing_loop, &incr_bsi,
1083 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1084 containing_loop, &incr_bsi, insert_after, &indx_before_incr,
1086 incr = bsi_stmt (incr_bsi);
1087 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1089 /* Copy the points-to information if it exists. */
1090 if (DR_PTR_INFO (dr))
1092 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1093 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1095 merge_alias_info (vect_ptr_init, indx_before_incr);
1096 merge_alias_info (vect_ptr_init, indx_after_incr);
1100 return indx_before_incr;
1107 /* Function bump_vector_ptr
1109 Increment a pointer (to a vector type) by vector-size. If requested,
1110 i.e. if PTR-INCR is given, then also connect the new increment stmt
1111 to the existing def-use update-chain of the pointer, by modifying
1112 the PTR_INCR as illustrated below:
1114 The pointer def-use update-chain before this function:
1115 DATAREF_PTR = phi (p_0, p_2)
1117 PTR_INCR: p_2 = DATAREF_PTR + step
1119 The pointer def-use update-chain after this function:
1120 DATAREF_PTR = phi (p_0, p_2)
1122 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1124 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1127 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1129 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1130 the loop. The increment amount across iterations is expected
1132 BSI - location where the new update stmt is to be placed.
1133 STMT - the original scalar memory-access stmt that is being vectorized.
1134 BUMP - optional. The offset by which to bump the pointer. If not given,
1135 the offset is assumed to be vector_size.
1137 Output: Return NEW_DATAREF_PTR as illustrated above.
1142 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
1143 tree stmt, tree bump)
1145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1146 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1147 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1148 tree vptr_type = TREE_TYPE (dataref_ptr);
1149 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
1150 tree update = TYPE_SIZE_UNIT (vectype);
1153 use_operand_p use_p;
1154 tree new_dataref_ptr;
1159 incr_stmt = build_gimple_modify_stmt (ptr_var,
1160 build2 (POINTER_PLUS_EXPR, vptr_type,
1161 dataref_ptr, update));
1162 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1163 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
1164 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1166 /* Copy the points-to information if it exists. */
1167 if (DR_PTR_INFO (dr))
1168 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1169 merge_alias_info (new_dataref_ptr, dataref_ptr);
1172 return new_dataref_ptr;
1174 /* Update the vector-pointer's cross-iteration increment. */
1175 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1177 tree use = USE_FROM_PTR (use_p);
1179 if (use == dataref_ptr)
1180 SET_USE (use_p, new_dataref_ptr);
1182 gcc_assert (tree_int_cst_compare (use, update) == 0);
1185 return new_dataref_ptr;
1189 /* Function vect_create_destination_var.
1191 Create a new temporary of type VECTYPE. */
1194 vect_create_destination_var (tree scalar_dest, tree vectype)
1197 const char *new_name;
1199 enum vect_var_kind kind;
1201 kind = vectype ? vect_simple_var : vect_scalar_var;
1202 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1204 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1206 new_name = get_name (scalar_dest);
1209 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1210 add_referenced_var (vec_dest);
1216 /* Function vect_init_vector.
1218 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1219 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1220 is not NULL. Otherwise, place the initialization at the loop preheader.
1221 Return the DEF of INIT_STMT.
1222 It will be used in the vectorization of STMT. */
1225 vect_init_vector (tree stmt, tree vector_var, tree vector_type,
1226 block_stmt_iterator *bsi)
1228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1237 add_referenced_var (new_var);
1238 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1239 new_temp = make_ssa_name (new_var, init_stmt);
1240 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1243 vect_finish_stmt_generation (stmt, init_stmt, bsi);
1246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1247 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1249 if (nested_in_vect_loop_p (loop, stmt))
1251 pe = loop_preheader_edge (loop);
1252 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1253 gcc_assert (!new_bb);
1256 if (vect_print_dump_info (REPORT_DETAILS))
1258 fprintf (vect_dump, "created new init_stmt: ");
1259 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1262 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1267 /* For constant and loop invariant defs of SLP_NODE this function returns
1268 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1269 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1273 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1274 unsigned int op_num)
1276 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1277 tree stmt = VEC_index (tree, stmts, 0);
1278 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1279 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1280 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1283 int j, number_of_places_left_in_vector;
1285 tree op, vop, operation;
1286 int group_size = VEC_length (tree, stmts);
1287 unsigned int vec_num, i;
1288 int number_of_copies = 1;
1289 bool is_store = false;
1290 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1291 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1293 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1296 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1297 created vectors. It is greater than 1 if unrolling is performed.
1299 For example, we have two scalar operands, s1 and s2 (e.g., group of
1300 strided accesses of size two), while NUINTS is four (i.e., four scalars
1301 of this type can be packed in a vector). The output vector will contain
1302 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1305 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1306 containing the operands.
1308 For example, NUINTS is four as before, and the group size is 8
1309 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1310 {s5, s6, s7, s8}. */
1312 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1314 number_of_places_left_in_vector = nunits;
1315 for (j = 0; j < number_of_copies; j++)
1317 for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
1319 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1323 op = TREE_OPERAND (operation, op_num);
1325 /* Create 'vect_ = {op0,op1,...,opn}'. */
1326 t = tree_cons (NULL_TREE, op, t);
1328 number_of_places_left_in_vector--;
1330 if (number_of_places_left_in_vector == 0)
1332 number_of_places_left_in_vector = nunits;
1334 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1335 vec_cst = build_constructor_from_list (vector_type, t);
1336 VEC_quick_push (tree, voprnds,
1337 vect_init_vector (stmt, vec_cst, vector_type,
1344 /* Since the vectors are created in the reverse order, we should invert
1346 vec_num = VEC_length (tree, voprnds);
1347 for (j = vec_num - 1; j >= 0; j--)
1349 vop = VEC_index (tree, voprnds, j);
1350 VEC_quick_push (tree, *vec_oprnds, vop);
1353 VEC_free (tree, heap, voprnds);
1355 /* In case that VF is greater than the unrolling factor needed for the SLP
1356 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1357 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1358 to replicate the vectors. */
1359 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1361 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1362 VEC_quick_push (tree, *vec_oprnds, vop);
1367 /* Get vectorized defintions from SLP_NODE that contains corresponding
1368 vectorized def-stmts. */
1371 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1377 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1380 VEC_iterate (tree, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1383 gcc_assert (vec_def_stmt);
1384 vec_oprnd = GIMPLE_STMT_OPERAND (vec_def_stmt, 0);
1385 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1390 /* Get vectorized definitions for SLP_NODE.
1391 If the scalar definitions are loop invariants or constants, collect them and
1392 call vect_get_constant_vectors() to create vector stmts.
1393 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1394 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1395 vect_get_slp_vect_defs() to retrieve them.
1396 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1397 the right node. This is used when the second operand must remain scalar. */
1400 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1401 VEC (tree,heap) **vec_oprnds1)
1403 tree operation, first_stmt;
1405 /* Allocate memory for vectorized defs. */
1406 *vec_oprnds0 = VEC_alloc (tree, heap,
1407 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1409 /* SLP_NODE corresponds either to a group of stores or to a group of
1410 unary/binary operations. We don't call this function for loads. */
1411 if (SLP_TREE_LEFT (slp_node))
1412 /* The defs are already vectorized. */
1413 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1415 /* Build vectors from scalar defs. */
1416 vect_get_constant_vectors (slp_node, vec_oprnds0, 0);
1418 first_stmt = VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1419 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1420 /* Since we don't call this function with loads, this is a group of
1424 operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
1425 if (TREE_OPERAND_LENGTH (operation) == unary_op || !vec_oprnds1)
1428 *vec_oprnds1 = VEC_alloc (tree, heap,
1429 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1431 if (SLP_TREE_RIGHT (slp_node))
1432 /* The defs are already vectorized. */
1433 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1435 /* Build vectors from scalar defs. */
1436 vect_get_constant_vectors (slp_node, vec_oprnds1, 1);
1440 /* Function get_initial_def_for_induction
1443 STMT - a stmt that performs an induction operation in the loop.
1444 IV_PHI - the initial value of the induction variable
1447 Return a vector variable, initialized with the first VF values of
1448 the induction variable. E.g., for an iv with IV_PHI='X' and
1449 evolution S, for a vector of 4 units, we want to return:
1450 [X, X + S, X + 2*S, X + 3*S]. */
1453 get_initial_def_for_induction (tree iv_phi)
1455 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1456 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1457 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1458 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1459 tree vectype = get_vectype_for_scalar_type (scalar_type);
1460 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1461 edge pe = loop_preheader_edge (loop);
1462 struct loop *iv_loop;
1464 tree vec, vec_init, vec_step, t;
1469 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1470 tree init_expr, step_expr;
1471 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1474 int ncopies = vf / nunits;
1476 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1477 bool nested_in_vect_loop = false;
1479 imm_use_iterator imm_iter;
1480 use_operand_p use_p;
1484 block_stmt_iterator si;
1485 basic_block bb = bb_for_stmt (iv_phi);
1487 gcc_assert (phi_info);
1488 gcc_assert (ncopies >= 1);
1490 /* Find the first insertion point in the BB. */
1491 si = bsi_after_labels (bb);
1493 if (INTEGRAL_TYPE_P (scalar_type))
1494 step_expr = build_int_cst (scalar_type, 0);
1496 step_expr = build_real (scalar_type, dconst0);
1498 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1499 if (nested_in_vect_loop_p (loop, iv_phi))
1501 nested_in_vect_loop = true;
1502 iv_loop = loop->inner;
1506 gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
1508 latch_e = loop_latch_edge (iv_loop);
1509 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1511 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
1512 gcc_assert (access_fn);
1513 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1514 &init_expr, &step_expr);
1516 pe = loop_preheader_edge (iv_loop);
1518 /* Create the vector that holds the initial_value of the induction. */
1519 if (nested_in_vect_loop)
1521 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1522 been created during vectorization of previous stmts; We obtain it from
1523 the STMT_VINFO_VEC_STMT of the defining stmt. */
1524 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1525 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
1529 /* iv_loop is the loop to be vectorized. Create:
1530 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1531 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1532 add_referenced_var (new_var);
1534 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1537 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1538 gcc_assert (!new_bb);
1542 t = tree_cons (NULL_TREE, init_expr, t);
1543 for (i = 1; i < nunits; i++)
1547 /* Create: new_name_i = new_name + step_expr */
1548 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1549 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1550 new_name = make_ssa_name (new_var, init_stmt);
1551 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1553 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1554 gcc_assert (!new_bb);
1556 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "created new init_stmt: ");
1559 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1561 t = tree_cons (NULL_TREE, new_name, t);
1563 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1564 vec = build_constructor_from_list (vectype, nreverse (t));
1565 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
1569 /* Create the vector that holds the step of the induction. */
1570 if (nested_in_vect_loop)
1571 /* iv_loop is nested in the loop to be vectorized. Generate:
1572 vec_step = [S, S, S, S] */
1573 new_name = step_expr;
1576 /* iv_loop is the loop to be vectorized. Generate:
1577 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1578 expr = build_int_cst (scalar_type, vf);
1579 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1583 for (i = 0; i < nunits; i++)
1584 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1585 vec = build_constructor_from_list (vectype, t);
1586 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1589 /* Create the following def-use cycle:
1594 vec_iv = PHI <vec_init, vec_loop>
1598 vec_loop = vec_iv + vec_step; */
1600 /* Create the induction-phi that defines the induction-operand. */
1601 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1602 add_referenced_var (vec_dest);
1603 induction_phi = create_phi_node (vec_dest, iv_loop->header);
1604 set_stmt_info (get_stmt_ann (induction_phi),
1605 new_stmt_vec_info (induction_phi, loop_vinfo));
1606 induc_def = PHI_RESULT (induction_phi);
1608 /* Create the iv update inside the loop */
1609 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1610 build2 (PLUS_EXPR, vectype,
1611 induc_def, vec_step));
1612 vec_def = make_ssa_name (vec_dest, new_stmt);
1613 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1614 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1615 set_stmt_info (get_stmt_ann (new_stmt),
1616 new_stmt_vec_info (new_stmt, loop_vinfo));
1618 /* Set the arguments of the phi node: */
1619 add_phi_arg (induction_phi, vec_init, pe);
1620 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
1623 /* In case that vectorization factor (VF) is bigger than the number
1624 of elements that we can fit in a vectype (nunits), we have to generate
1625 more than one vector stmt - i.e - we need to "unroll" the
1626 vector stmt by a factor VF/nunits. For more details see documentation
1627 in vectorizable_operation. */
1631 stmt_vec_info prev_stmt_vinfo;
1632 /* FORNOW. This restriction should be relaxed. */
1633 gcc_assert (!nested_in_vect_loop);
1635 /* Create the vector that holds the step of the induction. */
1636 expr = build_int_cst (scalar_type, nunits);
1637 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1639 for (i = 0; i < nunits; i++)
1640 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1641 vec = build_constructor_from_list (vectype, t);
1642 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1644 vec_def = induc_def;
1645 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1646 for (i = 1; i < ncopies; i++)
1650 /* vec_i = vec_prev + vec_step */
1651 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1652 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1653 vec_def = make_ssa_name (vec_dest, new_stmt);
1654 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1655 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1656 set_stmt_info (get_stmt_ann (new_stmt),
1657 new_stmt_vec_info (new_stmt, loop_vinfo));
1658 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1659 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1663 if (nested_in_vect_loop)
1665 /* Find the loop-closed exit-phi of the induction, and record
1666 the final vector of induction results: */
1668 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1670 if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
1672 exit_phi = USE_STMT (use_p);
1678 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1679 /* FORNOW. Currently not supporting the case that an inner-loop induction
1680 is not used in the outer-loop (i.e. only outside the outer-loop). */
1681 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1682 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1684 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1685 if (vect_print_dump_info (REPORT_DETAILS))
1687 fprintf (vect_dump, "vector of inductions after inner-loop:");
1688 print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
1694 if (vect_print_dump_info (REPORT_DETAILS))
1696 fprintf (vect_dump, "transform induction: created def-use cycle:");
1697 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1698 fprintf (vect_dump, "\n");
1699 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1702 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1707 /* Function vect_get_vec_def_for_operand.
1709 OP is an operand in STMT. This function returns a (vector) def that will be
1710 used in the vectorized stmt for STMT.
1712 In the case that OP is an SSA_NAME which is defined in the loop, then
1713 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1715 In case OP is an invariant or constant, a new stmt that creates a vector def
1716 needs to be introduced. */
1719 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1724 stmt_vec_info def_stmt_info = NULL;
1725 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1726 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1727 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1728 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1734 enum vect_def_type dt;
1738 if (vect_print_dump_info (REPORT_DETAILS))
1740 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1741 print_generic_expr (vect_dump, op, TDF_SLIM);
1744 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1745 gcc_assert (is_simple_use);
1746 if (vect_print_dump_info (REPORT_DETAILS))
1750 fprintf (vect_dump, "def = ");
1751 print_generic_expr (vect_dump, def, TDF_SLIM);
1755 fprintf (vect_dump, " def_stmt = ");
1756 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1762 /* Case 1: operand is a constant. */
1763 case vect_constant_def:
1768 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1769 if (vect_print_dump_info (REPORT_DETAILS))
1770 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1772 for (i = nunits - 1; i >= 0; --i)
1774 t = tree_cons (NULL_TREE, op, t);
1776 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1777 vec_cst = build_vector (vector_type, t);
1779 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1782 /* Case 2: operand is defined outside the loop - loop invariant. */
1783 case vect_invariant_def:
1788 /* Create 'vec_inv = {inv,inv,..,inv}' */
1789 if (vect_print_dump_info (REPORT_DETAILS))
1790 fprintf (vect_dump, "Create vector_inv.");
1792 for (i = nunits - 1; i >= 0; --i)
1794 t = tree_cons (NULL_TREE, def, t);
1797 /* FIXME: use build_constructor directly. */
1798 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1799 vec_inv = build_constructor_from_list (vector_type, t);
1800 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1803 /* Case 3: operand is defined inside the loop. */
1807 *scalar_def = def_stmt;
1809 /* Get the def from the vectorized stmt. */
1810 def_stmt_info = vinfo_for_stmt (def_stmt);
1811 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1812 gcc_assert (vec_stmt);
1813 if (TREE_CODE (vec_stmt) == PHI_NODE)
1814 vec_oprnd = PHI_RESULT (vec_stmt);
1816 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1820 /* Case 4: operand is defined by a loop header phi - reduction */
1821 case vect_reduction_def:
1825 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1826 loop = (bb_for_stmt (def_stmt))->loop_father;
1828 /* Get the def before the loop */
1829 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1830 return get_initial_def_for_reduction (stmt, op, scalar_def);
1833 /* Case 5: operand is defined by loop-header phi - induction. */
1834 case vect_induction_def:
1836 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1838 /* Get the def from the vectorized stmt. */
1839 def_stmt_info = vinfo_for_stmt (def_stmt);
1840 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1841 gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
1842 vec_oprnd = PHI_RESULT (vec_stmt);
1852 /* Function vect_get_vec_def_for_stmt_copy
1854 Return a vector-def for an operand. This function is used when the
1855 vectorized stmt to be created (by the caller to this function) is a "copy"
1856 created in case the vectorized result cannot fit in one vector, and several
1857 copies of the vector-stmt are required. In this case the vector-def is
1858 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1859 of the stmt that defines VEC_OPRND.
1860 DT is the type of the vector def VEC_OPRND.
1863 In case the vectorization factor (VF) is bigger than the number
1864 of elements that can fit in a vectype (nunits), we have to generate
1865 more than one vector stmt to vectorize the scalar stmt. This situation
1866 arises when there are multiple data-types operated upon in the loop; the
1867 smallest data-type determines the VF, and as a result, when vectorizing
1868 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1869 vector stmt (each computing a vector of 'nunits' results, and together
1870 computing 'VF' results in each iteration). This function is called when
1871 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1872 which VF=16 and nunits=4, so the number of copies required is 4):
1874 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1876 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1877 VS1.1: vx.1 = memref1 VS1.2
1878 VS1.2: vx.2 = memref2 VS1.3
1879 VS1.3: vx.3 = memref3
1881 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1882 VSnew.1: vz1 = vx.1 + ... VSnew.2
1883 VSnew.2: vz2 = vx.2 + ... VSnew.3
1884 VSnew.3: vz3 = vx.3 + ...
1886 The vectorization of S1 is explained in vectorizable_load.
1887 The vectorization of S2:
1888 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1889 the function 'vect_get_vec_def_for_operand' is called to
1890 get the relevant vector-def for each operand of S2. For operand x it
1891 returns the vector-def 'vx.0'.
1893 To create the remaining copies of the vector-stmt (VSnew.j), this
1894 function is called to get the relevant vector-def for each operand. It is
1895 obtained from the respective VS1.j stmt, which is recorded in the
1896 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1898 For example, to obtain the vector-def 'vx.1' in order to create the
1899 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1900 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1901 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1902 and return its def ('vx.1').
1903 Overall, to create the above sequence this function will be called 3 times:
1904 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1905 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1906 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1909 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1911 tree vec_stmt_for_operand;
1912 stmt_vec_info def_stmt_info;
1914 /* Do nothing; can reuse same def. */
1915 if (dt == vect_invariant_def || dt == vect_constant_def )
1918 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1919 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1920 gcc_assert (def_stmt_info);
1921 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1922 gcc_assert (vec_stmt_for_operand);
1923 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1928 /* Get vectorized definitions for the operands to create a copy of an original
1929 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1932 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1933 VEC(tree,heap) **vec_oprnds0,
1934 VEC(tree,heap) **vec_oprnds1)
1936 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1938 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1939 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1941 if (vec_oprnds1 && *vec_oprnds1)
1943 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1944 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1945 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1950 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1953 vect_get_vec_defs (tree op0, tree op1, tree stmt, VEC(tree,heap) **vec_oprnds0,
1954 VEC(tree,heap) **vec_oprnds1, slp_tree slp_node)
1957 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1962 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1963 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1964 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1968 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1969 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1970 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1976 /* Function vect_finish_stmt_generation.
1978 Insert a new stmt. */
1981 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1982 block_stmt_iterator *bsi)
1984 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1985 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1987 gcc_assert (stmt == bsi_stmt (*bsi));
1988 gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
1990 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1992 set_stmt_info (get_stmt_ann (vec_stmt),
1993 new_stmt_vec_info (vec_stmt, loop_vinfo));
1995 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "add new stmt: ");
1998 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2001 /* Make sure bsi points to the stmt that is being vectorized. */
2002 gcc_assert (stmt == bsi_stmt (*bsi));
2004 #ifdef USE_MAPPED_LOCATION
2005 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
2007 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2012 /* Function get_initial_def_for_reduction
2015 STMT - a stmt that performs a reduction operation in the loop.
2016 INIT_VAL - the initial value of the reduction variable
2019 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2020 of the reduction (used for adjusting the epilog - see below).
2021 Return a vector variable, initialized according to the operation that STMT
2022 performs. This vector will be used as the initial value of the
2023 vector of partial results.
2025 Option1 (adjust in epilog): Initialize the vector as follows:
2028 min/max: [init_val,init_val,..,init_val,init_val]
2029 bit and/or: [init_val,init_val,..,init_val,init_val]
2030 and when necessary (e.g. add/mult case) let the caller know
2031 that it needs to adjust the result by init_val.
2033 Option2: Initialize the vector as follows:
2034 add: [0,0,...,0,init_val]
2035 mult: [1,1,...,1,init_val]
2036 min/max: [init_val,init_val,...,init_val]
2037 bit and/or: [init_val,init_val,...,init_val]
2038 and no adjustments are needed.
2040 For example, for the following code:
2046 STMT is 's = s + a[i]', and the reduction variable is 's'.
2047 For a vector of 4 units, we want to return either [0,0,0,init_val],
2048 or [0,0,0,0] and let the caller know that it needs to adjust
2049 the result at the end by 'init_val'.
2051 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2052 initialization vector is simpler (same element in all entries).
2053 A cost model should help decide between these two schemes. */
2056 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
2058 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2059 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2060 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2061 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2062 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2063 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2064 tree type = TREE_TYPE (init_val);
2071 bool nested_in_vect_loop = false;
2073 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
2074 if (nested_in_vect_loop_p (loop, stmt))
2075 nested_in_vect_loop = true;
2077 gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
2079 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2083 case WIDEN_SUM_EXPR:
2086 if (nested_in_vect_loop)
2087 *adjustment_def = vecdef;
2089 *adjustment_def = init_val;
2090 /* Create a vector of zeros for init_def. */
2091 if (INTEGRAL_TYPE_P (type))
2092 def_for_init = build_int_cst (type, 0);
2094 def_for_init = build_real (type, dconst0);
2095 for (i = nunits - 1; i >= 0; --i)
2096 t = tree_cons (NULL_TREE, def_for_init, t);
2097 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
2098 init_def = build_vector (vector_type, t);
2103 *adjustment_def = NULL_TREE;
2115 /* Function vect_create_epilog_for_reduction
2117 Create code at the loop-epilog to finalize the result of a reduction
2120 VECT_DEF is a vector of partial results.
2121 REDUC_CODE is the tree-code for the epilog reduction.
2122 STMT is the scalar reduction stmt that is being vectorized.
2123 REDUCTION_PHI is the phi-node that carries the reduction computation.
2126 1. Creates the reduction def-use cycle: sets the arguments for
2128 The loop-entry argument is the vectorized initial-value of the reduction.
2129 The loop-latch argument is VECT_DEF - the vector of partial sums.
2130 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2131 by applying the operation specified by REDUC_CODE if available, or by
2132 other means (whole-vector shifts or a scalar loop).
2133 The function also creates a new phi node at the loop exit to preserve
2134 loop-closed form, as illustrated below.
2136 The flow at the entry to this function:
2139 vec_def = phi <null, null> # REDUCTION_PHI
2140 VECT_DEF = vector_stmt # vectorized form of STMT
2141 s_loop = scalar_stmt # (scalar) STMT
2143 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2147 The above is transformed by this function into:
2150 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2151 VECT_DEF = vector_stmt # vectorized form of STMT
2152 s_loop = scalar_stmt # (scalar) STMT
2154 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2155 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2156 v_out2 = reduce <v_out1>
2157 s_out3 = extract_field <v_out2, 0>
2158 s_out4 = adjust_result <s_out3>
2164 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
2165 enum tree_code reduc_code, tree reduction_phi)
2167 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2169 enum machine_mode mode;
2170 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2171 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2172 basic_block exit_bb;
2176 block_stmt_iterator exit_bsi;
2178 tree new_temp = NULL_TREE;
2180 tree epilog_stmt = NULL_TREE;
2181 tree new_scalar_dest, exit_phi, new_dest;
2182 tree bitsize, bitpos, bytesize;
2183 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2184 tree adjustment_def;
2185 tree vec_initial_def;
2187 imm_use_iterator imm_iter;
2188 use_operand_p use_p;
2189 bool extract_scalar_result = false;
2190 tree reduction_op, expr;
2193 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
2194 bool nested_in_vect_loop = false;
2196 VEC(tree,heap) *phis = NULL;
2199 if (nested_in_vect_loop_p (loop, stmt))
2202 nested_in_vect_loop = true;
2205 op_type = TREE_OPERAND_LENGTH (operation);
2206 reduction_op = TREE_OPERAND (operation, op_type-1);
2207 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2208 mode = TYPE_MODE (vectype);
2210 /*** 1. Create the reduction def-use cycle ***/
2212 /* 1.1 set the loop-entry arg of the reduction-phi: */
2213 /* For the case of reduction, vect_get_vec_def_for_operand returns
2214 the scalar def before the loop, that defines the initial value
2215 of the reduction variable. */
2216 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2218 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
2220 /* 1.2 set the loop-latch arg for the reduction-phi: */
2221 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
2223 if (vect_print_dump_info (REPORT_DETAILS))
2225 fprintf (vect_dump, "transform reduction: created def-use cycle:");
2226 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
2227 fprintf (vect_dump, "\n");
2228 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
2232 /*** 2. Create epilog code
2233 The reduction epilog code operates across the elements of the vector
2234 of partial results computed by the vectorized loop.
2235 The reduction epilog code consists of:
2236 step 1: compute the scalar result in a vector (v_out2)
2237 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2238 step 3: adjust the scalar result (s_out3) if needed.
2240 Step 1 can be accomplished using one the following three schemes:
2241 (scheme 1) using reduc_code, if available.
2242 (scheme 2) using whole-vector shifts, if available.
2243 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2246 The overall epilog code looks like this:
2248 s_out0 = phi <s_loop> # original EXIT_PHI
2249 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2250 v_out2 = reduce <v_out1> # step 1
2251 s_out3 = extract_field <v_out2, 0> # step 2
2252 s_out4 = adjust_result <s_out3> # step 3
2254 (step 3 is optional, and step2 1 and 2 may be combined).
2255 Lastly, the uses of s_out0 are replaced by s_out4.
2259 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2260 v_out1 = phi <v_loop> */
2262 exit_bb = single_exit (loop)->dest;
2263 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
2264 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
2265 exit_bsi = bsi_after_labels (exit_bb);
2267 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2268 (i.e. when reduc_code is not available) and in the final adjustment
2269 code (if needed). Also get the original scalar reduction variable as
2270 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2271 represents a reduction pattern), the tree-code and scalar-def are
2272 taken from the original stmt that the pattern-stmt (STMT) replaces.
2273 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2274 are taken from STMT. */
2276 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2279 /* Regular reduction */
2284 /* Reduction pattern */
2285 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2286 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2287 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2289 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2290 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
2291 scalar_type = TREE_TYPE (scalar_dest);
2292 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2293 bitsize = TYPE_SIZE (scalar_type);
2294 bytesize = TYPE_SIZE_UNIT (scalar_type);
2297 /* In case this is a reduction in an inner-loop while vectorizing an outer
2298 loop - we don't need to extract a single scalar result at the end of the
2299 inner-loop. The final vector of partial results will be used in the
2300 vectorized outer-loop, or reduced to a scalar result at the end of the
2302 if (nested_in_vect_loop)
2303 goto vect_finalize_reduction;
2305 /* 2.3 Create the reduction code, using one of the three schemes described
2308 if (reduc_code < NUM_TREE_CODES)
2312 /*** Case 1: Create:
2313 v_out2 = reduc_expr <v_out1> */
2315 if (vect_print_dump_info (REPORT_DETAILS))
2316 fprintf (vect_dump, "Reduce using direct vector reduction.");
2318 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2319 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2320 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2321 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2322 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2323 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2325 extract_scalar_result = true;
2329 enum tree_code shift_code = 0;
2330 bool have_whole_vector_shift = true;
2332 int element_bitsize = tree_low_cst (bitsize, 1);
2333 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2336 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2337 shift_code = VEC_RSHIFT_EXPR;
2339 have_whole_vector_shift = false;
2341 /* Regardless of whether we have a whole vector shift, if we're
2342 emulating the operation via tree-vect-generic, we don't want
2343 to use it. Only the first round of the reduction is likely
2344 to still be profitable via emulation. */
2345 /* ??? It might be better to emit a reduction tree code here, so that
2346 tree-vect-generic can expand the first round via bit tricks. */
2347 if (!VECTOR_MODE_P (mode))
2348 have_whole_vector_shift = false;
2351 optab optab = optab_for_tree_code (code, vectype);
2352 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
2353 have_whole_vector_shift = false;
2356 if (have_whole_vector_shift)
2358 /*** Case 2: Create:
2359 for (offset = VS/2; offset >= element_size; offset/=2)
2361 Create: va' = vec_shift <va, offset>
2362 Create: va = vop <va, va'>
2365 if (vect_print_dump_info (REPORT_DETAILS))
2366 fprintf (vect_dump, "Reduce using vector shifts");
2368 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2369 new_temp = PHI_RESULT (new_phi);
2371 for (bit_offset = vec_size_in_bits/2;
2372 bit_offset >= element_bitsize;
2375 tree bitpos = size_int (bit_offset);
2376 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
2377 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2378 new_name = make_ssa_name (vec_dest, epilog_stmt);
2379 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2380 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2382 tmp = build2 (code, vectype, new_name, new_temp);
2383 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2384 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2385 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2386 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2389 extract_scalar_result = true;
2395 /*** Case 3: Create:
2396 s = extract_field <v_out2, 0>
2397 for (offset = element_size;
2398 offset < vector_size;
2399 offset += element_size;)
2401 Create: s' = extract_field <v_out2, offset>
2402 Create: s = op <s, s'>
2405 if (vect_print_dump_info (REPORT_DETAILS))
2406 fprintf (vect_dump, "Reduce using scalar code. ");
2408 vec_temp = PHI_RESULT (new_phi);
2409 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2410 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2412 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2413 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2414 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2415 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2416 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2418 for (bit_offset = element_bitsize;
2419 bit_offset < vec_size_in_bits;
2420 bit_offset += element_bitsize)
2423 tree bitpos = bitsize_int (bit_offset);
2424 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2427 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2428 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2429 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
2430 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2431 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2433 tmp = build2 (code, scalar_type, new_name, new_temp);
2434 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
2435 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2436 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2437 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2440 extract_scalar_result = false;
2444 /* 2.4 Extract the final scalar result. Create:
2445 s_out3 = extract_field <v_out2, bitpos> */
2447 if (extract_scalar_result)
2451 gcc_assert (!nested_in_vect_loop);
2452 if (vect_print_dump_info (REPORT_DETAILS))
2453 fprintf (vect_dump, "extract scalar result");
2455 if (BYTES_BIG_ENDIAN)
2456 bitpos = size_binop (MULT_EXPR,
2457 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2458 TYPE_SIZE (scalar_type));
2460 bitpos = bitsize_zero_node;
2462 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2463 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2464 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2465 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2466 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2467 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2470 vect_finalize_reduction:
2472 /* 2.5 Adjust the final result by the initial value of the reduction
2473 variable. (When such adjustment is not needed, then
2474 'adjustment_def' is zero). For example, if code is PLUS we create:
2475 new_temp = loop_exit_def + adjustment_def */
2479 if (nested_in_vect_loop)
2481 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2482 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2483 new_dest = vect_create_destination_var (scalar_dest, vectype);
2487 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2488 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2489 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2491 epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
2492 new_temp = make_ssa_name (new_dest, epilog_stmt);
2493 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2494 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2498 /* 2.6 Handle the loop-exit phi */
2500 /* Replace uses of s_out0 with uses of s_out3:
2501 Find the loop-closed-use at the loop exit of the original scalar result.
2502 (The reduction result is expected to have two immediate uses - one at the
2503 latch block, and one at the loop exit). */
2504 phis = VEC_alloc (tree, heap, 10);
2505 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2507 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
2509 exit_phi = USE_STMT (use_p);
2510 VEC_quick_push (tree, phis, exit_phi);
2513 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2514 gcc_assert (!VEC_empty (tree, phis));
2516 for (i = 0; VEC_iterate (tree, phis, i, exit_phi); i++)
2518 if (nested_in_vect_loop)
2520 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2522 /* FORNOW. Currently not supporting the case that an inner-loop reduction
2523 is not used in the outer-loop (but only outside the outer-loop). */
2524 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2525 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2527 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2528 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2529 set_stmt_info (get_stmt_ann (epilog_stmt),
2530 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2534 /* Replace the uses: */
2535 orig_name = PHI_RESULT (exit_phi);
2536 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2537 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2538 SET_USE (use_p, new_temp);
2540 VEC_free (tree, heap, phis);
2544 /* Function vectorizable_reduction.
2546 Check if STMT performs a reduction operation that can be vectorized.
2547 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2548 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2549 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2551 This function also handles reduction idioms (patterns) that have been
2552 recognized in advance during vect_pattern_recog. In this case, STMT may be
2554 X = pattern_expr (arg0, arg1, ..., X)
2555 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2556 sequence that had been detected and replaced by the pattern-stmt (STMT).
2558 In some cases of reduction patterns, the type of the reduction variable X is
2559 different than the type of the other arguments of STMT.
2560 In such cases, the vectype that is used when transforming STMT into a vector
2561 stmt is different than the vectype that is used to determine the
2562 vectorization factor, because it consists of a different number of elements
2563 than the actual number of elements that are being operated upon in parallel.
2565 For example, consider an accumulation of shorts into an int accumulator.
2566 On some targets it's possible to vectorize this pattern operating on 8
2567 shorts at a time (hence, the vectype for purposes of determining the
2568 vectorization factor should be V8HI); on the other hand, the vectype that
2569 is used to create the vector form is actually V4SI (the type of the result).
2571 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2572 indicates what is the actual level of parallelism (V8HI in the example), so
2573 that the right vectorization factor would be derived. This vectype
2574 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2575 be used to create the vectorized stmt. The right vectype for the vectorized
2576 stmt is obtained from the type of the result X:
2577 get_vectype_for_scalar_type (TREE_TYPE (X))
2579 This means that, contrary to "regular" reductions (or "regular" stmts in
2580 general), the following equation:
2581 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2582 does *NOT* necessarily hold for reduction patterns. */
2585 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2590 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2591 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2592 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2593 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2594 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2596 enum tree_code code, orig_code, epilog_reduc_code = 0;
2597 enum machine_mode vec_mode;
2599 optab optab, reduc_optab;
2600 tree new_temp = NULL_TREE;
2602 enum vect_def_type dt;
2607 stmt_vec_info orig_stmt_info;
2608 tree expr = NULL_TREE;
2610 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2611 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2612 stmt_vec_info prev_stmt_info;
2614 tree new_stmt = NULL_TREE;
2617 if (nested_in_vect_loop_p (loop, stmt))
2620 /* FORNOW. This restriction should be relaxed. */
2623 if (vect_print_dump_info (REPORT_DETAILS))
2624 fprintf (vect_dump, "multiple types in nested loop.");
2629 gcc_assert (ncopies >= 1);
2631 /* FORNOW: SLP not supported. */
2632 if (STMT_SLP_TYPE (stmt_info))
2635 /* 1. Is vectorizable reduction? */
2637 /* Not supportable if the reduction variable is used in the loop. */
2638 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
2641 /* Reductions that are not used even in an enclosing outer-loop,
2642 are expected to be "live" (used out of the loop). */
2643 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2644 && !STMT_VINFO_LIVE_P (stmt_info))
2647 /* Make sure it was already recognized as a reduction computation. */
2648 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2651 /* 2. Has this been recognized as a reduction pattern?
2653 Check if STMT represents a pattern that has been recognized
2654 in earlier analysis stages. For stmts that represent a pattern,
2655 the STMT_VINFO_RELATED_STMT field records the last stmt in
2656 the original sequence that constitutes the pattern. */
2658 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2661 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2662 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2663 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2664 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2667 /* 3. Check the operands of the operation. The first operands are defined
2668 inside the loop body. The last operand is the reduction variable,
2669 which is defined by the loop-header-phi. */
2671 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2673 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2674 code = TREE_CODE (operation);
2675 op_type = TREE_OPERAND_LENGTH (operation);
2676 if (op_type != binary_op && op_type != ternary_op)
2678 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2679 scalar_type = TREE_TYPE (scalar_dest);
2681 /* All uses but the last are expected to be defined in the loop.
2682 The last use is the reduction variable. */
2683 for (i = 0; i < op_type-1; i++)
2685 op = TREE_OPERAND (operation, i);
2686 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2687 gcc_assert (is_simple_use);
2688 if (dt != vect_loop_def
2689 && dt != vect_invariant_def
2690 && dt != vect_constant_def
2691 && dt != vect_induction_def)
2695 op = TREE_OPERAND (operation, i);
2696 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2697 gcc_assert (is_simple_use);
2698 gcc_assert (dt == vect_reduction_def);
2699 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2701 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2703 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2705 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2708 /* 4. Supportable by target? */
2710 /* 4.1. check support for the operation in the loop */
2711 optab = optab_for_tree_code (code, vectype);
2714 if (vect_print_dump_info (REPORT_DETAILS))
2715 fprintf (vect_dump, "no optab.");
2718 vec_mode = TYPE_MODE (vectype);
2719 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2721 if (vect_print_dump_info (REPORT_DETAILS))
2722 fprintf (vect_dump, "op not supported by target.");
2723 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2724 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2725 < vect_min_worthwhile_factor (code))
2727 if (vect_print_dump_info (REPORT_DETAILS))
2728 fprintf (vect_dump, "proceeding using word mode.");
2731 /* Worthwhile without SIMD support? */
2732 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2733 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2734 < vect_min_worthwhile_factor (code))
2736 if (vect_print_dump_info (REPORT_DETAILS))
2737 fprintf (vect_dump, "not worthwhile without SIMD support.");
2741 /* 4.2. Check support for the epilog operation.
2743 If STMT represents a reduction pattern, then the type of the
2744 reduction variable may be different than the type of the rest
2745 of the arguments. For example, consider the case of accumulation
2746 of shorts into an int accumulator; The original code:
2747 S1: int_a = (int) short_a;
2748 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2751 STMT: int_acc = widen_sum <short_a, int_acc>
2754 1. The tree-code that is used to create the vector operation in the
2755 epilog code (that reduces the partial results) is not the
2756 tree-code of STMT, but is rather the tree-code of the original
2757 stmt from the pattern that STMT is replacing. I.e, in the example
2758 above we want to use 'widen_sum' in the loop, but 'plus' in the
2760 2. The type (mode) we use to check available target support
2761 for the vector operation to be created in the *epilog*, is
2762 determined by the type of the reduction variable (in the example
2763 above we'd check this: plus_optab[vect_int_mode]).
2764 However the type (mode) we use to check available target support
2765 for the vector operation to be created *inside the loop*, is
2766 determined by the type of the other arguments to STMT (in the
2767 example we'd check this: widen_sum_optab[vect_short_mode]).
2769 This is contrary to "regular" reductions, in which the types of all
2770 the arguments are the same as the type of the reduction variable.
2771 For "regular" reductions we can therefore use the same vector type
2772 (and also the same tree-code) when generating the epilog code and
2773 when generating the code inside the loop. */
2777 /* This is a reduction pattern: get the vectype from the type of the
2778 reduction variable, and get the tree-code from orig_stmt. */
2779 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2780 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2781 vec_mode = TYPE_MODE (vectype);
2785 /* Regular reduction: use the same vectype and tree-code as used for
2786 the vector code inside the loop can be used for the epilog code. */
2790 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2792 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2795 if (vect_print_dump_info (REPORT_DETAILS))
2796 fprintf (vect_dump, "no optab for reduction.");
2797 epilog_reduc_code = NUM_TREE_CODES;
2799 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
2801 if (vect_print_dump_info (REPORT_DETAILS))
2802 fprintf (vect_dump, "reduc op not supported by target.");
2803 epilog_reduc_code = NUM_TREE_CODES;
2806 if (!vec_stmt) /* transformation not required. */
2808 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2809 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2815 if (vect_print_dump_info (REPORT_DETAILS))
2816 fprintf (vect_dump, "transform reduction.");
2818 /* Create the destination vector */
2819 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2821 /* Create the reduction-phi that defines the reduction-operand. */
2822 new_phi = create_phi_node (vec_dest, loop->header);
2824 /* In case the vectorization factor (VF) is bigger than the number
2825 of elements that we can fit in a vectype (nunits), we have to generate
2826 more than one vector stmt - i.e - we need to "unroll" the
2827 vector stmt by a factor VF/nunits. For more details see documentation
2828 in vectorizable_operation. */
2830 prev_stmt_info = NULL;
2831 for (j = 0; j < ncopies; j++)
2836 op = TREE_OPERAND (operation, 0);
2837 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2838 if (op_type == ternary_op)
2840 op = TREE_OPERAND (operation, 1);
2841 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2844 /* Get the vector def for the reduction variable from the phi node */
2845 reduc_def = PHI_RESULT (new_phi);
2849 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2850 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2851 if (op_type == ternary_op)
2852 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2854 /* Get the vector def for the reduction variable from the vectorized
2855 reduction operation generated in the previous iteration (j-1) */
2856 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2859 /* Arguments are ready. create the new vector stmt. */
2860 if (op_type == binary_op)
2861 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2863 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2865 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2866 new_temp = make_ssa_name (vec_dest, new_stmt);
2867 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2868 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2871 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2873 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2874 prev_stmt_info = vinfo_for_stmt (new_stmt);
2877 /* Finalize the reduction-phi (set it's arguments) and create the
2878 epilog reduction code. */
2879 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2883 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2884 a function declaration if the target has a vectorized version
2885 of the function, or NULL_TREE if the function cannot be vectorized. */
2888 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2890 tree fndecl = get_callee_fndecl (call);
2891 enum built_in_function code;
2893 /* We only handle functions that do not read or clobber memory -- i.e.
2894 const or novops ones. */
2895 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2899 || TREE_CODE (fndecl) != FUNCTION_DECL
2900 || !DECL_BUILT_IN (fndecl))
2903 code = DECL_FUNCTION_CODE (fndecl);
2904 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2908 /* Function vectorizable_call.
2910 Check if STMT performs a function call that can be vectorized.
2911 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2912 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2913 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2916 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2922 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2924 tree vectype_out, vectype_in;
2927 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2928 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2929 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2930 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2932 int ncopies, j, nargs;
2933 call_expr_arg_iterator iter;
2935 enum { NARROW, NONE, WIDEN } modifier;
2937 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2940 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2943 /* FORNOW: SLP not supported. */
2944 if (STMT_SLP_TYPE (stmt_info))
2947 /* FORNOW: not yet supported. */
2948 if (STMT_VINFO_LIVE_P (stmt_info))
2950 if (vect_print_dump_info (REPORT_DETAILS))
2951 fprintf (vect_dump, "value used after loop.");
2955 /* Is STMT a vectorizable call? */
2956 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2959 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2962 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2963 if (TREE_CODE (operation) != CALL_EXPR)
2966 /* Process function arguments. */
2967 rhs_type = NULL_TREE;
2969 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2971 /* Bail out if the function has more than two arguments, we
2972 do not have interesting builtin functions to vectorize with
2973 more than two arguments. */
2977 /* We can only handle calls with arguments of the same type. */
2979 && rhs_type != TREE_TYPE (op))
2981 if (vect_print_dump_info (REPORT_DETAILS))
2982 fprintf (vect_dump, "argument types differ.");
2985 rhs_type = TREE_TYPE (op);
2987 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2989 if (vect_print_dump_info (REPORT_DETAILS))
2990 fprintf (vect_dump, "use not simple.");
2997 /* No arguments is also not good. */
3001 vectype_in = get_vectype_for_scalar_type (rhs_type);
3002 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3004 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
3005 vectype_out = get_vectype_for_scalar_type (lhs_type);
3006 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3009 if (nunits_in == nunits_out / 2)
3011 else if (nunits_out == nunits_in)
3013 else if (nunits_out == nunits_in / 2)
3018 /* For now, we only vectorize functions if a target specific builtin
3019 is available. TODO -- in some cases, it might be profitable to
3020 insert the calls for pieces of the vector, in order to be able
3021 to vectorize other operations in the loop. */
3022 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
3023 if (fndecl == NULL_TREE)
3025 if (vect_print_dump_info (REPORT_DETAILS))
3026 fprintf (vect_dump, "function is not vectorizable.");
3031 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
3033 if (modifier == NARROW)
3034 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3036 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3038 /* Sanity check: make sure that at least one copy of the vectorized stmt
3039 needs to be generated. */
3040 gcc_assert (ncopies >= 1);
3042 /* FORNOW. This restriction should be relaxed. */
3043 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3045 if (vect_print_dump_info (REPORT_DETAILS))
3046 fprintf (vect_dump, "multiple types in nested loop.");
3050 if (!vec_stmt) /* transformation not required. */
3052 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3053 if (vect_print_dump_info (REPORT_DETAILS))
3054 fprintf (vect_dump, "=== vectorizable_call ===");
3055 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3061 if (vect_print_dump_info (REPORT_DETAILS))
3062 fprintf (vect_dump, "transform operation.");
3064 /* FORNOW. This restriction should be relaxed. */
3065 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3067 if (vect_print_dump_info (REPORT_DETAILS))
3068 fprintf (vect_dump, "multiple types in nested loop.");
3073 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3074 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3076 prev_stmt_info = NULL;
3080 for (j = 0; j < ncopies; ++j)
3082 /* Build argument list for the vectorized call. */
3083 /* FIXME: Rewrite this so that it doesn't
3084 construct a temporary list. */
3087 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3091 = vect_get_vec_def_for_operand (op, stmt, NULL);
3094 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3096 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3100 vargs = nreverse (vargs);
3102 rhs = build_function_call_expr (fndecl, vargs);
3103 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3104 new_temp = make_ssa_name (vec_dest, new_stmt);
3105 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3107 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3110 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3112 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3114 prev_stmt_info = vinfo_for_stmt (new_stmt);
3120 for (j = 0; j < ncopies; ++j)
3122 /* Build argument list for the vectorized call. */
3123 /* FIXME: Rewrite this so that it doesn't
3124 construct a temporary list. */
3127 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3132 = vect_get_vec_def_for_operand (op, stmt, NULL);
3134 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3139 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3141 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3144 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3145 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
3149 vargs = nreverse (vargs);
3151 rhs = build_function_call_expr (fndecl, vargs);
3152 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3153 new_temp = make_ssa_name (vec_dest, new_stmt);
3154 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3156 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3159 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3161 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3163 prev_stmt_info = vinfo_for_stmt (new_stmt);
3166 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3171 /* No current target implements this case. */
3175 /* The call in STMT might prevent it from being removed in dce.
3176 We however cannot remove it here, due to the way the ssa name
3177 it defines is mapped to the new definition. So just replace
3178 rhs of the statement with something harmless. */
3179 type = TREE_TYPE (scalar_dest);
3180 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
3187 /* Function vect_gen_widened_results_half
3189 Create a vector stmt whose code, type, number of arguments, and result
3190 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
3191 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3192 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3193 needs to be created (DECL is a function-decl of a target-builtin).
3194 STMT is the original scalar stmt that we are vectorizing. */
3197 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
3198 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3199 tree vec_dest, block_stmt_iterator *bsi,
3208 /* Generate half of the widened result: */
3209 if (code == CALL_EXPR)
3211 /* Target specific support */
3212 if (op_type == binary_op)
3213 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
3215 expr = build_call_expr (decl, 1, vec_oprnd0);
3219 /* Generic support */
3220 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3221 if (op_type == binary_op)
3222 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
3224 expr = build1 (code, vectype, vec_oprnd0);
3226 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3227 new_temp = make_ssa_name (vec_dest, new_stmt);
3228 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3229 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3231 if (code == CALL_EXPR)
3233 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3235 if (TREE_CODE (sym) == SSA_NAME)
3236 sym = SSA_NAME_VAR (sym);
3237 mark_sym_for_renaming (sym);
3245 /* Check if STMT performs a conversion operation, that can be vectorized.
3246 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3247 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3248 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3251 vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
3252 tree *vec_stmt, slp_tree slp_node)
3258 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3259 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3260 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3261 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3262 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3263 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3266 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3267 tree new_stmt = NULL_TREE;
3268 stmt_vec_info prev_stmt_info;
3271 tree vectype_out, vectype_in;
3274 tree rhs_type, lhs_type;
3276 enum { NARROW, NONE, WIDEN } modifier;
3278 VEC(tree,heap) *vec_oprnds0 = NULL;
3281 /* Is STMT a vectorizable conversion? */
3283 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3286 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3289 if (STMT_VINFO_LIVE_P (stmt_info))
3291 /* FORNOW: not yet supported. */
3292 if (vect_print_dump_info (REPORT_DETAILS))
3293 fprintf (vect_dump, "value used after loop.");
3297 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3300 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3303 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3304 code = TREE_CODE (operation);
3305 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3308 /* Check types of lhs and rhs. */
3309 op0 = TREE_OPERAND (operation, 0);
3310 rhs_type = TREE_TYPE (op0);
3311 vectype_in = get_vectype_for_scalar_type (rhs_type);
3314 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3316 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3317 lhs_type = TREE_TYPE (scalar_dest);
3318 vectype_out = get_vectype_for_scalar_type (lhs_type);
3321 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3324 if (nunits_in == nunits_out / 2)
3326 else if (nunits_out == nunits_in)
3328 else if (nunits_out == nunits_in / 2)
3333 if (modifier == NONE)
3334 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3336 /* Bail out if the types are both integral or non-integral. */
3337 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3338 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3341 if (modifier == NARROW)
3342 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3344 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3346 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3347 this, so we can safely override NCOPIES with 1 here. */
3351 /* Sanity check: make sure that at least one copy of the vectorized stmt
3352 needs to be generated. */
3353 gcc_assert (ncopies >= 1);
3355 /* FORNOW. This restriction should be relaxed. */
3356 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3358 if (vect_print_dump_info (REPORT_DETAILS))
3359 fprintf (vect_dump, "multiple types in nested loop.");
3363 /* Check the operands of the operation. */
3364 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3366 if (vect_print_dump_info (REPORT_DETAILS))
3367 fprintf (vect_dump, "use not simple.");
3371 /* Supportable by target? */
3372 if ((modifier == NONE
3373 && !targetm.vectorize.builtin_conversion (code, vectype_in))
3374 || (modifier == WIDEN
3375 && !supportable_widening_operation (code, stmt, vectype_in,
3378 || (modifier == NARROW
3379 && !supportable_narrowing_operation (code, stmt, vectype_in,
3382 if (vect_print_dump_info (REPORT_DETAILS))
3383 fprintf (vect_dump, "op not supported by target.");
3387 if (modifier != NONE)
3389 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3390 /* FORNOW: SLP not supported. */
3391 if (STMT_SLP_TYPE (stmt_info))
3395 if (!vec_stmt) /* transformation not required. */
3397 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3402 if (vect_print_dump_info (REPORT_DETAILS))
3403 fprintf (vect_dump, "transform conversion.");
3406 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3408 if (modifier == NONE && !slp_node)
3409 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3411 prev_stmt_info = NULL;
3415 for (j = 0; j < ncopies; j++)
3421 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
3423 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3426 targetm.vectorize.builtin_conversion (code, vectype_in);
3427 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3429 new_stmt = build_call_expr (builtin_decl, 1, vop0);
3431 /* Arguments are ready. create the new vector stmt. */
3432 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
3433 new_temp = make_ssa_name (vec_dest, new_stmt);
3434 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3435 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3436 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3437 SSA_OP_ALL_VIRTUALS)
3439 if (TREE_CODE (sym) == SSA_NAME)
3440 sym = SSA_NAME_VAR (sym);
3441 mark_sym_for_renaming (sym);
3444 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3448 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3450 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3451 prev_stmt_info = vinfo_for_stmt (new_stmt);
3456 /* In case the vectorization factor (VF) is bigger than the number
3457 of elements that we can fit in a vectype (nunits), we have to
3458 generate more than one vector stmt - i.e - we need to "unroll"
3459 the vector stmt by a factor VF/nunits. */
3460 for (j = 0; j < ncopies; j++)
3463 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3465 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3467 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3469 /* Generate first half of the widened result: */
3471 = vect_gen_widened_results_half (code1, vectype_out, decl1,
3472 vec_oprnd0, vec_oprnd1,
3473 unary_op, vec_dest, bsi, stmt);
3475 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3477 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3478 prev_stmt_info = vinfo_for_stmt (new_stmt);
3480 /* Generate second half of the widened result: */
3482 = vect_gen_widened_results_half (code2, vectype_out, decl2,
3483 vec_oprnd0, vec_oprnd1,
3484 unary_op, vec_dest, bsi, stmt);
3485 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3486 prev_stmt_info = vinfo_for_stmt (new_stmt);
3491 /* In case the vectorization factor (VF) is bigger than the number
3492 of elements that we can fit in a vectype (nunits), we have to
3493 generate more than one vector stmt - i.e - we need to "unroll"
3494 the vector stmt by a factor VF/nunits. */
3495 for (j = 0; j < ncopies; j++)
3500 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3501 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3505 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3506 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3509 /* Arguments are ready. Create the new vector stmt. */
3510 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3511 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3512 new_temp = make_ssa_name (vec_dest, new_stmt);
3513 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3514 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3517 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3519 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3521 prev_stmt_info = vinfo_for_stmt (new_stmt);
3524 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3531 /* Function vectorizable_assignment.
3533 Check if STMT performs an assignment (copy) that can be vectorized.
3534 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3535 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3536 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3539 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3545 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3546 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3547 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3550 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3551 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3552 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3554 VEC(tree,heap) *vec_oprnds = NULL;
3557 gcc_assert (ncopies >= 1);
3559 return false; /* FORNOW */
3561 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3564 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3567 /* FORNOW: not yet supported. */
3568 if (STMT_VINFO_LIVE_P (stmt_info))
3570 if (vect_print_dump_info (REPORT_DETAILS))
3571 fprintf (vect_dump, "value used after loop.");
3575 /* Is vectorizable assignment? */
3576 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3579 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3580 if (TREE_CODE (scalar_dest) != SSA_NAME)
3583 op = GIMPLE_STMT_OPERAND (stmt, 1);
3584 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
3586 if (vect_print_dump_info (REPORT_DETAILS))
3587 fprintf (vect_dump, "use not simple.");
3591 if (!vec_stmt) /* transformation not required. */
3593 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3594 if (vect_print_dump_info (REPORT_DETAILS))
3595 fprintf (vect_dump, "=== vectorizable_assignment ===");
3596 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3601 if (vect_print_dump_info (REPORT_DETAILS))
3602 fprintf (vect_dump, "transform assignment.");
3605 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3608 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3610 /* Arguments are ready. create the new vector stmt. */
3611 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3613 *vec_stmt = build_gimple_modify_stmt (vec_dest, vop);
3614 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3615 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3616 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3617 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3620 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3623 VEC_free (tree, heap, vec_oprnds);
3628 /* Function vect_min_worthwhile_factor.
3630 For a loop where we could vectorize the operation indicated by CODE,
3631 return the minimum vectorization factor that makes it worthwhile
3632 to use generic vectors. */
3634 vect_min_worthwhile_factor (enum tree_code code)
3655 /* Function vectorizable_induction
3657 Check if PHI performs an induction computation that can be vectorized.
3658 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3659 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3660 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3663 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3666 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3667 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3668 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3669 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3670 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3673 gcc_assert (ncopies >= 1);
3675 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3678 /* FORNOW: SLP not supported. */
3679 if (STMT_SLP_TYPE (stmt_info))
3682 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3684 if (STMT_VINFO_LIVE_P (stmt_info))
3686 /* FORNOW: not yet supported. */
3687 if (vect_print_dump_info (REPORT_DETAILS))
3688 fprintf (vect_dump, "value used after loop.");
3692 if (TREE_CODE (phi) != PHI_NODE)
3695 if (!vec_stmt) /* transformation not required. */
3697 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3698 if (vect_print_dump_info (REPORT_DETAILS))
3699 fprintf (vect_dump, "=== vectorizable_induction ===");
3700 vect_model_induction_cost (stmt_info, ncopies);
3706 if (vect_print_dump_info (REPORT_DETAILS))
3707 fprintf (vect_dump, "transform induction phi.");
3709 vec_def = get_initial_def_for_induction (phi);
3710 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3715 /* Function vectorizable_operation.
3717 Check if STMT performs a binary or unary operation that can be vectorized.
3718 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3719 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3720 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3723 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3729 tree op0, op1 = NULL;
3730 tree vec_oprnd1 = NULL_TREE;
3731 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3732 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3733 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3734 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3735 enum tree_code code;
3736 enum machine_mode vec_mode;
3741 enum machine_mode optab_op2_mode;
3743 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3744 tree new_stmt = NULL_TREE;
3745 stmt_vec_info prev_stmt_info;
3746 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3749 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3751 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3754 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3755 this, so we can safely override NCOPIES with 1 here. */
3758 gcc_assert (ncopies >= 1);
3759 /* FORNOW. This restriction should be relaxed. */
3760 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3762 if (vect_print_dump_info (REPORT_DETAILS))
3763 fprintf (vect_dump, "multiple types in nested loop.");
3767 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3770 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3773 /* FORNOW: not yet supported. */
3774 if (STMT_VINFO_LIVE_P (stmt_info))
3776 if (vect_print_dump_info (REPORT_DETAILS))
3777 fprintf (vect_dump, "value used after loop.");
3781 /* Is STMT a vectorizable binary/unary operation? */
3782 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3785 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3788 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3789 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3790 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3791 if (nunits_out != nunits_in)
3794 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3795 code = TREE_CODE (operation);
3797 /* For pointer addition, we should use the normal plus for
3798 the vector addition. */
3799 if (code == POINTER_PLUS_EXPR)
3802 optab = optab_for_tree_code (code, vectype);
3804 /* Support only unary or binary operations. */
3805 op_type = TREE_OPERAND_LENGTH (operation);
3806 if (op_type != unary_op && op_type != binary_op)
3808 if (vect_print_dump_info (REPORT_DETAILS))
3809 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3813 op0 = TREE_OPERAND (operation, 0);
3814 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3816 if (vect_print_dump_info (REPORT_DETAILS))
3817 fprintf (vect_dump, "use not simple.");
3821 if (op_type == binary_op)
3823 op1 = TREE_OPERAND (operation, 1);
3824 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3826 if (vect_print_dump_info (REPORT_DETAILS))
3827 fprintf (vect_dump, "use not simple.");
3832 /* Supportable by target? */
3835 if (vect_print_dump_info (REPORT_DETAILS))
3836 fprintf (vect_dump, "no optab.");
3839 vec_mode = TYPE_MODE (vectype);
3840 icode = (int) optab_handler (optab, vec_mode)->insn_code;
3841 if (icode == CODE_FOR_nothing)
3843 if (vect_print_dump_info (REPORT_DETAILS))
3844 fprintf (vect_dump, "op not supported by target.");
3845 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3846 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3847 < vect_min_worthwhile_factor (code))
3849 if (vect_print_dump_info (REPORT_DETAILS))
3850 fprintf (vect_dump, "proceeding using word mode.");
3853 /* Worthwhile without SIMD support? */
3854 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3855 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3856 < vect_min_worthwhile_factor (code))
3858 if (vect_print_dump_info (REPORT_DETAILS))
3859 fprintf (vect_dump, "not worthwhile without SIMD support.");
3863 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3865 /* FORNOW: not yet supported. */
3866 if (!VECTOR_MODE_P (vec_mode))
3869 /* Invariant argument is needed for a vector shift
3870 by a scalar shift operand. */
3871 optab_op2_mode = insn_data[icode].operand[2].mode;
3872 if (! (VECTOR_MODE_P (optab_op2_mode)
3873 || dt[1] == vect_constant_def
3874 || dt[1] == vect_invariant_def))
3876 if (vect_print_dump_info (REPORT_DETAILS))
3877 fprintf (vect_dump, "operand mode requires invariant argument.");
3882 if (!vec_stmt) /* transformation not required. */
3884 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3885 if (vect_print_dump_info (REPORT_DETAILS))
3886 fprintf (vect_dump, "=== vectorizable_operation ===");
3887 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3893 if (vect_print_dump_info (REPORT_DETAILS))
3894 fprintf (vect_dump, "transform binary/unary operation.");
3897 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3900 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3901 if (op_type == binary_op)
3902 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3904 /* In case the vectorization factor (VF) is bigger than the number
3905 of elements that we can fit in a vectype (nunits), we have to generate
3906 more than one vector stmt - i.e - we need to "unroll" the
3907 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3908 from one copy of the vector stmt to the next, in the field
3909 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3910 stages to find the correct vector defs to be used when vectorizing
3911 stmts that use the defs of the current stmt. The example below illustrates
3912 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3913 4 vectorized stmts):
3915 before vectorization:
3916 RELATED_STMT VEC_STMT
3920 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3922 RELATED_STMT VEC_STMT
3923 VS1_0: vx0 = memref0 VS1_1 -
3924 VS1_1: vx1 = memref1 VS1_2 -
3925 VS1_2: vx2 = memref2 VS1_3 -
3926 VS1_3: vx3 = memref3 - -
3927 S1: x = load - VS1_0
3930 step2: vectorize stmt S2 (done here):
3931 To vectorize stmt S2 we first need to find the relevant vector
3932 def for the first operand 'x'. This is, as usual, obtained from
3933 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3934 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3935 relevant vector def 'vx0'. Having found 'vx0' we can generate
3936 the vector stmt VS2_0, and as usual, record it in the
3937 STMT_VINFO_VEC_STMT of stmt S2.
3938 When creating the second copy (VS2_1), we obtain the relevant vector
3939 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3940 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3941 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3942 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3943 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3944 chain of stmts and pointers:
3945 RELATED_STMT VEC_STMT
3946 VS1_0: vx0 = memref0 VS1_1 -
3947 VS1_1: vx1 = memref1 VS1_2 -
3948 VS1_2: vx2 = memref2 VS1_3 -
3949 VS1_3: vx3 = memref3 - -
3950 S1: x = load - VS1_0
3951 VS2_0: vz0 = vx0 + v1 VS2_1 -
3952 VS2_1: vz1 = vx1 + v1 VS2_2 -
3953 VS2_2: vz2 = vx2 + v1 VS2_3 -
3954 VS2_3: vz3 = vx3 + v1 - -
3955 S2: z = x + 1 - VS2_0 */
3957 prev_stmt_info = NULL;
3958 for (j = 0; j < ncopies; j++)
3963 if (op_type == binary_op
3964 && (code == LSHIFT_EXPR || code == RSHIFT_EXPR))
3966 /* Vector shl and shr insn patterns can be defined with scalar
3967 operand 2 (shift operand). In this case, use constant or loop
3968 invariant op1 directly, without extending it to vector mode
3970 optab_op2_mode = insn_data[icode].operand[2].mode;
3971 if (!VECTOR_MODE_P (optab_op2_mode))
3973 if (vect_print_dump_info (REPORT_DETAILS))
3974 fprintf (vect_dump, "operand 1 using scalar mode.");
3976 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3980 /* vec_oprnd is available if operand 1 should be of a scalar-type
3981 (a special case for certain kind of vector shifts); otherwise,
3982 operand 1 should be of a vector type (the usual case). */
3983 if (op_type == binary_op && !vec_oprnd1)
3984 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3987 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
3991 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3993 /* Arguments are ready. Create the new vector stmt. */
3994 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3996 if (op_type == binary_op)
3998 vop1 = VEC_index (tree, vec_oprnds1, i);
3999 new_stmt = build_gimple_modify_stmt (vec_dest,
4000 build2 (code, vectype, vop0, vop1));
4003 new_stmt = build_gimple_modify_stmt (vec_dest,
4004 build1 (code, vectype, vop0));
4006 new_temp = make_ssa_name (vec_dest, new_stmt);
4007 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4008 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4010 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4014 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4016 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4017 prev_stmt_info = vinfo_for_stmt (new_stmt);
4020 VEC_free (tree, heap, vec_oprnds0);
4022 VEC_free (tree, heap, vec_oprnds1);
4028 /* Function vectorizable_type_demotion
4030 Check if STMT performs a binary or unary operation that involves
4031 type demotion, and if it can be vectorized.
4032 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4033 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4034 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4037 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
4044 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4045 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4046 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4047 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4048 enum tree_code code, code1 = ERROR_MARK;
4051 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4053 stmt_vec_info prev_stmt_info;
4062 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4065 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4068 /* FORNOW: not yet supported. */
4069 if (STMT_VINFO_LIVE_P (stmt_info))
4071 if (vect_print_dump_info (REPORT_DETAILS))
4072 fprintf (vect_dump, "value used after loop.");
4076 /* Is STMT a vectorizable type-demotion operation? */
4077 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4080 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4083 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4084 code = TREE_CODE (operation);
4085 if (code != NOP_EXPR && code != CONVERT_EXPR)
4088 op0 = TREE_OPERAND (operation, 0);
4089 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4092 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4094 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4095 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4098 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4099 if (nunits_in != nunits_out / 2) /* FORNOW */
4102 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4103 gcc_assert (ncopies >= 1);
4104 /* FORNOW. This restriction should be relaxed. */
4105 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4107 if (vect_print_dump_info (REPORT_DETAILS))
4108 fprintf (vect_dump, "multiple types in nested loop.");
4112 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4113 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4114 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4115 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4116 && (code == NOP_EXPR || code == CONVERT_EXPR))))
4119 /* Check the operands of the operation. */
4120 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4122 if (vect_print_dump_info (REPORT_DETAILS))
4123 fprintf (vect_dump, "use not simple.");
4127 /* Supportable by target? */
4128 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
4131 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4133 if (!vec_stmt) /* transformation not required. */
4135 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4136 if (vect_print_dump_info (REPORT_DETAILS))
4137 fprintf (vect_dump, "=== vectorizable_demotion ===");
4138 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
4143 if (vect_print_dump_info (REPORT_DETAILS))
4144 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
4148 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4150 /* In case the vectorization factor (VF) is bigger than the number
4151 of elements that we can fit in a vectype (nunits), we have to generate
4152 more than one vector stmt - i.e - we need to "unroll" the
4153 vector stmt by a factor VF/nunits. */
4154 prev_stmt_info = NULL;
4155 for (j = 0; j < ncopies; j++)
4160 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4161 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4165 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
4166 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4169 /* Arguments are ready. Create the new vector stmt. */
4170 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
4171 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
4172 new_temp = make_ssa_name (vec_dest, new_stmt);
4173 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4174 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4177 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4179 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4181 prev_stmt_info = vinfo_for_stmt (new_stmt);
4184 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4189 /* Function vectorizable_type_promotion
4191 Check if STMT performs a binary or unary operation that involves
4192 type promotion, and if it can be vectorized.
4193 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4194 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4195 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4198 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
4204 tree op0, op1 = NULL;
4205 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4206 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4207 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4209 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4210 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4213 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4215 stmt_vec_info prev_stmt_info;
4223 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4229 /* FORNOW: not yet supported. */
4230 if (STMT_VINFO_LIVE_P (stmt_info))
4232 if (vect_print_dump_info (REPORT_DETAILS))
4233 fprintf (vect_dump, "value used after loop.");
4237 /* Is STMT a vectorizable type-promotion operation? */
4238 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4241 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4244 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4245 code = TREE_CODE (operation);
4246 if (code != NOP_EXPR && code != CONVERT_EXPR
4247 && code != WIDEN_MULT_EXPR)
4250 op0 = TREE_OPERAND (operation, 0);
4251 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4254 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4256 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4257 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4260 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4261 if (nunits_out != nunits_in / 2) /* FORNOW */
4264 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4265 gcc_assert (ncopies >= 1);
4266 /* FORNOW. This restriction should be relaxed. */
4267 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4269 if (vect_print_dump_info (REPORT_DETAILS))
4270 fprintf (vect_dump, "multiple types in nested loop.");
4274 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4275 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4276 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4277 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4278 && (code == CONVERT_EXPR || code == NOP_EXPR))))
4281 /* Check the operands of the operation. */
4282 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4284 if (vect_print_dump_info (REPORT_DETAILS))
4285 fprintf (vect_dump, "use not simple.");
4289 op_type = TREE_CODE_LENGTH (code);
4290 if (op_type == binary_op)
4292 op1 = TREE_OPERAND (operation, 1);
4293 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
4295 if (vect_print_dump_info (REPORT_DETAILS))
4296 fprintf (vect_dump, "use not simple.");
4301 /* Supportable by target? */
4302 if (!supportable_widening_operation (code, stmt, vectype_in,
4303 &decl1, &decl2, &code1, &code2))
4306 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4308 if (!vec_stmt) /* transformation not required. */
4310 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4311 if (vect_print_dump_info (REPORT_DETAILS))
4312 fprintf (vect_dump, "=== vectorizable_promotion ===");
4313 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
4319 if (vect_print_dump_info (REPORT_DETAILS))
4320 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
4324 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4326 /* In case the vectorization factor (VF) is bigger than the number
4327 of elements that we can fit in a vectype (nunits), we have to generate
4328 more than one vector stmt - i.e - we need to "unroll" the
4329 vector stmt by a factor VF/nunits. */
4331 prev_stmt_info = NULL;
4332 for (j = 0; j < ncopies; j++)
4337 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4338 if (op_type == binary_op)
4339 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
4343 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4344 if (op_type == binary_op)
4345 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
4348 /* Arguments are ready. Create the new vector stmt. We are creating
4349 two vector defs because the widened result does not fit in one vector.
4350 The vectorized stmt can be expressed as a call to a taregt builtin,
4351 or a using a tree-code. */
4352 /* Generate first half of the widened result: */
4353 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
4354 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4356 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4358 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4359 prev_stmt_info = vinfo_for_stmt (new_stmt);
4361 /* Generate second half of the widened result: */
4362 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
4363 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4364 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4365 prev_stmt_info = vinfo_for_stmt (new_stmt);
4369 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4374 /* Function vect_strided_store_supported.
4376 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4377 and FALSE otherwise. */
4380 vect_strided_store_supported (tree vectype)
4382 optab interleave_high_optab, interleave_low_optab;
4385 mode = (int) TYPE_MODE (vectype);
4387 /* Check that the operation is supported. */
4388 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
4390 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
4392 if (!interleave_high_optab || !interleave_low_optab)
4394 if (vect_print_dump_info (REPORT_DETAILS))
4395 fprintf (vect_dump, "no optab for interleave.");
4399 if (optab_handler (interleave_high_optab, mode)->insn_code
4401 || optab_handler (interleave_low_optab, mode)->insn_code
4402 == CODE_FOR_nothing)
4404 if (vect_print_dump_info (REPORT_DETAILS))
4405 fprintf (vect_dump, "interleave op not supported by target.");
4413 /* Function vect_permute_store_chain.
4415 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4416 a power of 2, generate interleave_high/low stmts to reorder the data
4417 correctly for the stores. Return the final references for stores in
4420 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4421 The input is 4 vectors each containing 8 elements. We assign a number to each
4422 element, the input sequence is:
4424 1st vec: 0 1 2 3 4 5 6 7
4425 2nd vec: 8 9 10 11 12 13 14 15
4426 3rd vec: 16 17 18 19 20 21 22 23
4427 4th vec: 24 25 26 27 28 29 30 31
4429 The output sequence should be:
4431 1st vec: 0 8 16 24 1 9 17 25
4432 2nd vec: 2 10 18 26 3 11 19 27
4433 3rd vec: 4 12 20 28 5 13 21 30
4434 4th vec: 6 14 22 30 7 15 23 31
4436 i.e., we interleave the contents of the four vectors in their order.
4438 We use interleave_high/low instructions to create such output. The input of
4439 each interleave_high/low operation is two vectors:
4442 the even elements of the result vector are obtained left-to-right from the
4443 high/low elements of the first vector. The odd elements of the result are
4444 obtained left-to-right from the high/low elements of the second vector.
4445 The output of interleave_high will be: 0 4 1 5
4446 and of interleave_low: 2 6 3 7
4449 The permutation is done in log LENGTH stages. In each stage interleave_high
4450 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4451 where the first argument is taken from the first half of DR_CHAIN and the
4452 second argument from it's second half.
4455 I1: interleave_high (1st vec, 3rd vec)
4456 I2: interleave_low (1st vec, 3rd vec)
4457 I3: interleave_high (2nd vec, 4th vec)
4458 I4: interleave_low (2nd vec, 4th vec)
4460 The output for the first stage is:
4462 I1: 0 16 1 17 2 18 3 19
4463 I2: 4 20 5 21 6 22 7 23
4464 I3: 8 24 9 25 10 26 11 27
4465 I4: 12 28 13 29 14 30 15 31
4467 The output of the second stage, i.e. the final result is:
4469 I1: 0 8 16 24 1 9 17 25
4470 I2: 2 10 18 26 3 11 19 27
4471 I3: 4 12 20 28 5 13 21 30
4472 I4: 6 14 22 30 7 15 23 31. */
4475 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4476 unsigned int length,
4478 block_stmt_iterator *bsi,
4479 VEC(tree,heap) **result_chain)
4481 tree perm_dest, perm_stmt, vect1, vect2, high, low;
4482 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4483 tree scalar_dest, tmp;
4486 VEC(tree,heap) *first, *second;
4488 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4489 first = VEC_alloc (tree, heap, length/2);
4490 second = VEC_alloc (tree, heap, length/2);
4492 /* Check that the operation is supported. */
4493 if (!vect_strided_store_supported (vectype))
4496 *result_chain = VEC_copy (tree, heap, dr_chain);
4498 for (i = 0; i < exact_log2 (length); i++)
4500 for (j = 0; j < length/2; j++)
4502 vect1 = VEC_index (tree, dr_chain, j);
4503 vect2 = VEC_index (tree, dr_chain, j+length/2);
4505 /* Create interleaving stmt:
4506 in the case of big endian:
4507 high = interleave_high (vect1, vect2)
4508 and in the case of little endian:
4509 high = interleave_low (vect1, vect2). */
4510 perm_dest = create_tmp_var (vectype, "vect_inter_high");
4511 DECL_GIMPLE_REG_P (perm_dest) = 1;
4512 add_referenced_var (perm_dest);
4513 if (BYTES_BIG_ENDIAN)
4514 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4516 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4517 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4518 high = make_ssa_name (perm_dest, perm_stmt);
4519 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
4520 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4521 VEC_replace (tree, *result_chain, 2*j, high);
4523 /* Create interleaving stmt:
4524 in the case of big endian:
4525 low = interleave_low (vect1, vect2)
4526 and in the case of little endian:
4527 low = interleave_high (vect1, vect2). */
4528 perm_dest = create_tmp_var (vectype, "vect_inter_low");
4529 DECL_GIMPLE_REG_P (perm_dest) = 1;
4530 add_referenced_var (perm_dest);
4531 if (BYTES_BIG_ENDIAN)
4532 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4534 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4535 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4536 low = make_ssa_name (perm_dest, perm_stmt);
4537 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
4538 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4539 VEC_replace (tree, *result_chain, 2*j+1, low);
4541 dr_chain = VEC_copy (tree, heap, *result_chain);
4547 /* Function vectorizable_store.
4549 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4551 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4552 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4553 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4556 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
4562 tree vec_oprnd = NULL_TREE;
4563 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4564 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4565 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4566 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4567 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4568 enum machine_mode vec_mode;
4570 enum dr_alignment_support alignment_support_scheme;
4572 enum vect_def_type dt;
4573 stmt_vec_info prev_stmt_info = NULL;
4574 tree dataref_ptr = NULL_TREE;
4575 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4576 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4578 tree next_stmt, first_stmt;
4579 bool strided_store = false;
4580 unsigned int group_size, i;
4581 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
4583 VEC(tree,heap) *vec_oprnds = NULL;
4584 bool slp = (slp_node != NULL);
4585 stmt_vec_info first_stmt_vinfo;
4586 unsigned int vec_num;
4588 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
4589 this, so we can safely override NCOPIES with 1 here. */
4593 gcc_assert (ncopies >= 1);
4595 /* FORNOW. This restriction should be relaxed. */
4596 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4598 if (vect_print_dump_info (REPORT_DETAILS))
4599 fprintf (vect_dump, "multiple types in nested loop.");
4603 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4606 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4609 if (STMT_VINFO_LIVE_P (stmt_info))
4611 if (vect_print_dump_info (REPORT_DETAILS))
4612 fprintf (vect_dump, "value used after loop.");
4616 /* Is vectorizable store? */
4618 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4621 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4622 if (TREE_CODE (scalar_dest) != ARRAY_REF
4623 && TREE_CODE (scalar_dest) != INDIRECT_REF
4624 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
4627 op = GIMPLE_STMT_OPERAND (stmt, 1);
4628 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4630 if (vect_print_dump_info (REPORT_DETAILS))
4631 fprintf (vect_dump, "use not simple.");
4635 vec_mode = TYPE_MODE (vectype);
4636 /* FORNOW. In some cases can vectorize even if data-type not supported
4637 (e.g. - array initialization with 0). */
4638 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
4641 if (!STMT_VINFO_DATA_REF (stmt_info))
4644 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4646 strided_store = true;
4647 if (!vect_strided_store_supported (vectype)
4648 && !PURE_SLP_STMT (stmt_info) && !slp)
4652 if (!vec_stmt) /* transformation not required. */
4654 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4655 if (!PURE_SLP_STMT (stmt_info))
4656 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
4664 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4665 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4666 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4668 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4671 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
4673 /* We vectorize all the stmts of the interleaving group when we
4674 reach the last stmt in the group. */
4675 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4676 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
4679 *vec_stmt = NULL_TREE;
4684 strided_store = false;
4686 /* VEC_NUM is the number of vect stmts to be created for this group. */
4687 if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
4688 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4690 vec_num = group_size;
4696 group_size = vec_num = 1;
4697 first_stmt_vinfo = stmt_info;
4700 if (vect_print_dump_info (REPORT_DETAILS))
4701 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
4703 dr_chain = VEC_alloc (tree, heap, group_size);
4704 oprnds = VEC_alloc (tree, heap, group_size);
4706 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
4707 gcc_assert (alignment_support_scheme);
4708 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
4710 /* In case the vectorization factor (VF) is bigger than the number
4711 of elements that we can fit in a vectype (nunits), we have to generate
4712 more than one vector stmt - i.e - we need to "unroll" the
4713 vector stmt by a factor VF/nunits. For more details see documentation in
4714 vect_get_vec_def_for_copy_stmt. */
4716 /* In case of interleaving (non-unit strided access):
4723 We create vectorized stores starting from base address (the access of the
4724 first stmt in the chain (S2 in the above example), when the last store stmt
4725 of the chain (S4) is reached:
4728 VS2: &base + vec_size*1 = vx0
4729 VS3: &base + vec_size*2 = vx1
4730 VS4: &base + vec_size*3 = vx3
4732 Then permutation statements are generated:
4734 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4735 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4738 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4739 (the order of the data-refs in the output of vect_permute_store_chain
4740 corresponds to the order of scalar stmts in the interleaving chain - see
4741 the documentation of vect_permute_store_chain()).
4743 In case of both multiple types and interleaving, above vector stores and
4744 permutation stmts are created for every copy. The result vector stmts are
4745 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4746 STMT_VINFO_RELATED_STMT for the next copies.
4749 prev_stmt_info = NULL;
4750 for (j = 0; j < ncopies; j++)
4759 /* Get vectorized arguments for SLP_NODE. */
4760 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
4762 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4766 /* For interleaved stores we collect vectorized defs for all the
4767 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4768 used as an input to vect_permute_store_chain(), and OPRNDS as
4769 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4771 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4772 OPRNDS are of size 1. */
4773 next_stmt = first_stmt;
4774 for (i = 0; i < group_size; i++)
4776 /* Since gaps are not supported for interleaved stores,
4777 GROUP_SIZE is the exact number of stmts in the chain.
4778 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4779 there is no interleaving, GROUP_SIZE is 1, and only one
4780 iteration of the loop will be executed. */
4781 gcc_assert (next_stmt);
4782 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4784 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4786 VEC_quick_push(tree, dr_chain, vec_oprnd);
4787 VEC_quick_push(tree, oprnds, vec_oprnd);
4788 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4791 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
4792 &dummy, &ptr_incr, false,
4793 TREE_TYPE (vec_oprnd), &inv_p);
4794 gcc_assert (!inv_p);
4798 /* FORNOW SLP doesn't work for multiple types. */
4801 /* For interleaved stores we created vectorized defs for all the
4802 defs stored in OPRNDS in the previous iteration (previous copy).
4803 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4804 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4806 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4807 OPRNDS are of size 1. */
4808 for (i = 0; i < group_size; i++)
4810 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
4811 VEC_index (tree, oprnds, i));
4812 VEC_replace(tree, dr_chain, i, vec_oprnd);
4813 VEC_replace(tree, oprnds, i, vec_oprnd);
4816 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
4821 result_chain = VEC_alloc (tree, heap, group_size);
4823 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4828 next_stmt = first_stmt;
4829 for (i = 0; i < vec_num; i++)
4832 /* Bump the vector pointer. */
4833 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
4837 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4838 else if (strided_store)
4839 /* For strided stores vectorized defs are interleaved in
4840 vect_permute_store_chain(). */
4841 vec_oprnd = VEC_index (tree, result_chain, i);
4843 data_ref = build_fold_indirect_ref (dataref_ptr);
4844 /* Arguments are ready. Create the new vector stmt. */
4845 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4846 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4847 mark_symbols_for_renaming (new_stmt);
4850 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4852 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4854 prev_stmt_info = vinfo_for_stmt (new_stmt);
4855 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4865 /* Function vect_setup_realignment
4867 This function is called when vectorizing an unaligned load using
4868 the dr_explicit_realign[_optimized] scheme.
4869 This function generates the following code at the loop prolog:
4872 x msq_init = *(floor(p)); # prolog load
4873 realignment_token = call target_builtin;
4875 x msq = phi (msq_init, ---)
4877 The stmts marked with x are generated only for the case of
4878 dr_explicit_realign_optimized.
4880 The code above sets up a new (vector) pointer, pointing to the first
4881 location accessed by STMT, and a "floor-aligned" load using that pointer.
4882 It also generates code to compute the "realignment-token" (if the relevant
4883 target hook was defined), and creates a phi-node at the loop-header bb
4884 whose arguments are the result of the prolog-load (created by this
4885 function) and the result of a load that takes place in the loop (to be
4886 created by the caller to this function).
4888 For the case of dr_explicit_realign_optimized:
4889 The caller to this function uses the phi-result (msq) to create the
4890 realignment code inside the loop, and sets up the missing phi argument,
4893 msq = phi (msq_init, lsq)
4894 lsq = *(floor(p')); # load in loop
4895 result = realign_load (msq, lsq, realignment_token);
4897 For the case of dr_explicit_realign:
4899 msq = *(floor(p)); # load in loop
4901 lsq = *(floor(p')); # load in loop
4902 result = realign_load (msq, lsq, realignment_token);
4905 STMT - (scalar) load stmt to be vectorized. This load accesses
4906 a memory location that may be unaligned.
4907 BSI - place where new code is to be inserted.
4908 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4912 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4913 target hook, if defined.
4914 Return value - the result of the loop-header phi node. */
4917 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4918 tree *realignment_token,
4919 enum dr_alignment_support alignment_support_scheme,
4921 struct loop **at_loop)
4923 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4925 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4926 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4928 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4935 tree msq_init = NULL_TREE;
4938 tree msq = NULL_TREE;
4939 tree stmts = NULL_TREE;
4941 bool compute_in_loop = false;
4942 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4943 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
4944 struct loop *loop_for_initial_load;
4946 gcc_assert (alignment_support_scheme == dr_explicit_realign
4947 || alignment_support_scheme == dr_explicit_realign_optimized);
4949 /* We need to generate three things:
4950 1. the misalignment computation
4951 2. the extra vector load (for the optimized realignment scheme).
4952 3. the phi node for the two vectors from which the realignment is
4953 done (for the optimized realignment scheme).
4956 /* 1. Determine where to generate the misalignment computation.
4958 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4959 calculation will be generated by this function, outside the loop (in the
4960 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4961 caller, inside the loop.
4963 Background: If the misalignment remains fixed throughout the iterations of
4964 the loop, then both realignment schemes are applicable, and also the
4965 misalignment computation can be done outside LOOP. This is because we are
4966 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4967 are a multiple of VS (the Vector Size), and therefore the misalignment in
4968 different vectorized LOOP iterations is always the same.
4969 The problem arises only if the memory access is in an inner-loop nested
4970 inside LOOP, which is now being vectorized using outer-loop vectorization.
4971 This is the only case when the misalignment of the memory access may not
4972 remain fixed throughout the iterations of the inner-loop (as explained in
4973 detail in vect_supportable_dr_alignment). In this case, not only is the
4974 optimized realignment scheme not applicable, but also the misalignment
4975 computation (and generation of the realignment token that is passed to
4976 REALIGN_LOAD) have to be done inside the loop.
4978 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4979 or not, which in turn determines if the misalignment is computed inside
4980 the inner-loop, or outside LOOP. */
4982 if (init_addr != NULL_TREE)
4984 compute_in_loop = true;
4985 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4989 /* 2. Determine where to generate the extra vector load.
4991 For the optimized realignment scheme, instead of generating two vector
4992 loads in each iteration, we generate a single extra vector load in the
4993 preheader of the loop, and in each iteration reuse the result of the
4994 vector load from the previous iteration. In case the memory access is in
4995 an inner-loop nested inside LOOP, which is now being vectorized using
4996 outer-loop vectorization, we need to determine whether this initial vector
4997 load should be generated at the preheader of the inner-loop, or can be
4998 generated at the preheader of LOOP. If the memory access has no evolution
4999 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5000 to be generated inside LOOP (in the preheader of the inner-loop). */
5002 if (nested_in_vect_loop)
5004 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5005 bool invariant_in_outerloop =
5006 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5007 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5010 loop_for_initial_load = loop;
5012 *at_loop = loop_for_initial_load;
5014 /* 3. For the case of the optimized realignment, create the first vector
5015 load at the loop preheader. */
5017 if (alignment_support_scheme == dr_explicit_realign_optimized)
5019 /* Create msq_init = *(floor(p1)) in the loop preheader */
5021 gcc_assert (!compute_in_loop);
5022 pe = loop_preheader_edge (loop_for_initial_load);
5023 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5024 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
5025 &init_addr, &inc, true, NULL_TREE, &inv_p);
5026 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5027 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5028 new_temp = make_ssa_name (vec_dest, new_stmt);
5029 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5030 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5031 gcc_assert (!new_bb);
5032 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
5035 /* 4. Create realignment token using a target builtin, if available.
5036 It is done either inside the containing loop, or before LOOP (as
5037 determined above). */
5039 if (targetm.vectorize.builtin_mask_for_load)
5043 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5044 if (compute_in_loop)
5045 gcc_assert (init_addr); /* already computed by the caller. */
5048 /* Generate the INIT_ADDR computation outside LOOP. */
5049 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5051 pe = loop_preheader_edge (loop);
5052 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
5053 gcc_assert (!new_bb);
5056 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5057 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
5058 vec_dest = vect_create_destination_var (scalar_dest,
5059 TREE_TYPE (new_stmt));
5060 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5061 new_temp = make_ssa_name (vec_dest, new_stmt);
5062 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5064 if (compute_in_loop)
5065 bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
5068 /* Generate the misalignment computation outside LOOP. */
5069 pe = loop_preheader_edge (loop);
5070 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5071 gcc_assert (!new_bb);
5074 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
5076 /* The result of the CALL_EXPR to this builtin is determined from
5077 the value of the parameter and no global variables are touched
5078 which makes the builtin a "const" function. Requiring the
5079 builtin to have the "const" attribute makes it unnecessary
5080 to call mark_call_clobbered. */
5081 gcc_assert (TREE_READONLY (builtin_decl));
5084 if (alignment_support_scheme == dr_explicit_realign)
5087 gcc_assert (!compute_in_loop);
5088 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5091 /* 5. Create msq = phi <msq_init, lsq> in loop */
5093 pe = loop_preheader_edge (containing_loop);
5094 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5095 msq = make_ssa_name (vec_dest, NULL_TREE);
5096 phi_stmt = create_phi_node (msq, containing_loop->header);
5097 SSA_NAME_DEF_STMT (msq) = phi_stmt;
5098 add_phi_arg (phi_stmt, msq_init, pe);
5104 /* Function vect_strided_load_supported.
5106 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5107 and FALSE otherwise. */
5110 vect_strided_load_supported (tree vectype)
5112 optab perm_even_optab, perm_odd_optab;
5115 mode = (int) TYPE_MODE (vectype);
5117 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
5118 if (!perm_even_optab)
5120 if (vect_print_dump_info (REPORT_DETAILS))
5121 fprintf (vect_dump, "no optab for perm_even.");
5125 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
5127 if (vect_print_dump_info (REPORT_DETAILS))
5128 fprintf (vect_dump, "perm_even op not supported by target.");
5132 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
5133 if (!perm_odd_optab)
5135 if (vect_print_dump_info (REPORT_DETAILS))
5136 fprintf (vect_dump, "no optab for perm_odd.");
5140 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
5142 if (vect_print_dump_info (REPORT_DETAILS))
5143 fprintf (vect_dump, "perm_odd op not supported by target.");
5150 /* Function vect_permute_load_chain.
5152 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5153 a power of 2, generate extract_even/odd stmts to reorder the input data
5154 correctly. Return the final references for loads in RESULT_CHAIN.
5156 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5157 The input is 4 vectors each containing 8 elements. We assign a number to each
5158 element, the input sequence is:
5160 1st vec: 0 1 2 3 4 5 6 7
5161 2nd vec: 8 9 10 11 12 13 14 15
5162 3rd vec: 16 17 18 19 20 21 22 23
5163 4th vec: 24 25 26 27 28 29 30 31
5165 The output sequence should be:
5167 1st vec: 0 4 8 12 16 20 24 28
5168 2nd vec: 1 5 9 13 17 21 25 29
5169 3rd vec: 2 6 10 14 18 22 26 30
5170 4th vec: 3 7 11 15 19 23 27 31
5172 i.e., the first output vector should contain the first elements of each
5173 interleaving group, etc.
5175 We use extract_even/odd instructions to create such output. The input of each
5176 extract_even/odd operation is two vectors
5180 and the output is the vector of extracted even/odd elements. The output of
5181 extract_even will be: 0 2 4 6
5182 and of extract_odd: 1 3 5 7
5185 The permutation is done in log LENGTH stages. In each stage extract_even and
5186 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5187 order. In our example,
5189 E1: extract_even (1st vec, 2nd vec)
5190 E2: extract_odd (1st vec, 2nd vec)
5191 E3: extract_even (3rd vec, 4th vec)
5192 E4: extract_odd (3rd vec, 4th vec)
5194 The output for the first stage will be:
5196 E1: 0 2 4 6 8 10 12 14
5197 E2: 1 3 5 7 9 11 13 15
5198 E3: 16 18 20 22 24 26 28 30
5199 E4: 17 19 21 23 25 27 29 31
5201 In order to proceed and create the correct sequence for the next stage (or
5202 for the correct output, if the second stage is the last one, as in our
5203 example), we first put the output of extract_even operation and then the
5204 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5205 The input for the second stage is:
5207 1st vec (E1): 0 2 4 6 8 10 12 14
5208 2nd vec (E3): 16 18 20 22 24 26 28 30
5209 3rd vec (E2): 1 3 5 7 9 11 13 15
5210 4th vec (E4): 17 19 21 23 25 27 29 31
5212 The output of the second stage:
5214 E1: 0 4 8 12 16 20 24 28
5215 E2: 2 6 10 14 18 22 26 30
5216 E3: 1 5 9 13 17 21 25 29
5217 E4: 3 7 11 15 19 23 27 31
5219 And RESULT_CHAIN after reordering:
5221 1st vec (E1): 0 4 8 12 16 20 24 28
5222 2nd vec (E3): 1 5 9 13 17 21 25 29
5223 3rd vec (E2): 2 6 10 14 18 22 26 30
5224 4th vec (E4): 3 7 11 15 19 23 27 31. */
5227 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
5228 unsigned int length,
5230 block_stmt_iterator *bsi,
5231 VEC(tree,heap) **result_chain)
5233 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
5234 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5239 /* Check that the operation is supported. */
5240 if (!vect_strided_load_supported (vectype))
5243 *result_chain = VEC_copy (tree, heap, dr_chain);
5244 for (i = 0; i < exact_log2 (length); i++)
5246 for (j = 0; j < length; j +=2)
5248 first_vect = VEC_index (tree, dr_chain, j);
5249 second_vect = VEC_index (tree, dr_chain, j+1);
5251 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5252 perm_dest = create_tmp_var (vectype, "vect_perm_even");
5253 DECL_GIMPLE_REG_P (perm_dest) = 1;
5254 add_referenced_var (perm_dest);
5256 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
5257 first_vect, second_vect);
5258 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5260 data_ref = make_ssa_name (perm_dest, perm_stmt);
5261 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5262 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5263 mark_symbols_for_renaming (perm_stmt);
5265 VEC_replace (tree, *result_chain, j/2, data_ref);
5267 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5268 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
5269 DECL_GIMPLE_REG_P (perm_dest) = 1;
5270 add_referenced_var (perm_dest);
5272 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
5273 first_vect, second_vect);
5274 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5275 data_ref = make_ssa_name (perm_dest, perm_stmt);
5276 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5277 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5278 mark_symbols_for_renaming (perm_stmt);
5280 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
5282 dr_chain = VEC_copy (tree, heap, *result_chain);
5288 /* Function vect_transform_strided_load.
5290 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5291 to perform their permutation and ascribe the result vectorized statements to
5292 the scalar statements.
5296 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
5297 block_stmt_iterator *bsi)
5299 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5300 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5301 tree next_stmt, new_stmt;
5302 VEC(tree,heap) *result_chain = NULL;
5303 unsigned int i, gap_count;
5306 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5307 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5308 vectors, that are ready for vector computation. */
5309 result_chain = VEC_alloc (tree, heap, size);
5311 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
5314 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5315 Since we scan the chain starting from it's first node, their order
5316 corresponds the order of data-refs in RESULT_CHAIN. */
5317 next_stmt = first_stmt;
5319 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
5324 /* Skip the gaps. Loads created for the gaps will be removed by dead
5325 code elimination pass later.
5326 DR_GROUP_GAP is the number of steps in elements from the previous
5327 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5328 correspond to the gaps.
5330 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
5338 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5339 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5340 copies, and we put the new vector statement in the first available
5342 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5343 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5346 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5347 tree rel_stmt = STMT_VINFO_RELATED_STMT (
5348 vinfo_for_stmt (prev_stmt));
5351 prev_stmt = rel_stmt;
5352 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5354 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
5356 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5358 /* If NEXT_STMT accesses the same DR as the previous statement,
5359 put the same TMP_DATA_REF as its vectorized statement; otherwise
5360 get the next data-ref from RESULT_CHAIN. */
5361 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5369 /* vectorizable_load.
5371 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5373 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5374 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5375 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5378 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
5382 tree vec_dest = NULL;
5383 tree data_ref = NULL;
5385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5386 stmt_vec_info prev_stmt_info;
5387 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5388 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5389 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
5390 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5391 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
5392 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5395 tree new_stmt = NULL_TREE;
5397 enum dr_alignment_support alignment_support_scheme;
5398 tree dataref_ptr = NULL_TREE;
5400 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5401 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5402 int i, j, group_size;
5403 tree msq = NULL_TREE, lsq;
5404 tree offset = NULL_TREE;
5405 tree realignment_token = NULL_TREE;
5406 tree phi = NULL_TREE;
5407 VEC(tree,heap) *dr_chain = NULL;
5408 bool strided_load = false;
5412 bool compute_in_loop = false;
5413 struct loop *at_loop;
5415 bool slp = (slp_node != NULL);
5417 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
5418 this, so we can safely override NCOPIES with 1 here. */
5422 gcc_assert (ncopies >= 1);
5424 /* FORNOW. This restriction should be relaxed. */
5425 if (nested_in_vect_loop && ncopies > 1)
5427 if (vect_print_dump_info (REPORT_DETAILS))
5428 fprintf (vect_dump, "multiple types in nested loop.");
5432 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5435 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5438 /* FORNOW: not yet supported. */
5439 if (STMT_VINFO_LIVE_P (stmt_info))
5441 if (vect_print_dump_info (REPORT_DETAILS))
5442 fprintf (vect_dump, "value used after loop.");
5446 /* Is vectorizable load? */
5447 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5450 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5451 if (TREE_CODE (scalar_dest) != SSA_NAME)
5454 op = GIMPLE_STMT_OPERAND (stmt, 1);
5455 if (TREE_CODE (op) != ARRAY_REF
5456 && TREE_CODE (op) != INDIRECT_REF
5457 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
5460 if (!STMT_VINFO_DATA_REF (stmt_info))
5463 scalar_type = TREE_TYPE (DR_REF (dr));
5464 mode = (int) TYPE_MODE (vectype);
5466 /* FORNOW. In some cases can vectorize even if data-type not supported
5467 (e.g. - data copies). */
5468 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
5470 if (vect_print_dump_info (REPORT_DETAILS))
5471 fprintf (vect_dump, "Aligned load, but unsupported type.");
5475 /* Check if the load is a part of an interleaving chain. */
5476 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5478 strided_load = true;
5480 gcc_assert (! nested_in_vect_loop);
5482 /* Check if interleaving is supported. */
5483 if (!vect_strided_load_supported (vectype)
5484 && !PURE_SLP_STMT (stmt_info) && !slp)
5488 if (!vec_stmt) /* transformation not required. */
5490 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5491 vect_model_load_cost (stmt_info, ncopies, NULL);
5495 if (vect_print_dump_info (REPORT_DETAILS))
5496 fprintf (vect_dump, "transform load.");
5502 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5503 /* Check if the chain of loads is already vectorized. */
5504 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
5506 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5509 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5510 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
5511 dr_chain = VEC_alloc (tree, heap, group_size);
5513 /* VEC_NUM is the number of vect stmts to be created for this group. */
5516 strided_load = false;
5517 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5520 vec_num = group_size;
5526 group_size = vec_num = 1;
5529 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
5530 gcc_assert (alignment_support_scheme);
5532 /* In case the vectorization factor (VF) is bigger than the number
5533 of elements that we can fit in a vectype (nunits), we have to generate
5534 more than one vector stmt - i.e - we need to "unroll" the
5535 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5536 from one copy of the vector stmt to the next, in the field
5537 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5538 stages to find the correct vector defs to be used when vectorizing
5539 stmts that use the defs of the current stmt. The example below illustrates
5540 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
5541 4 vectorized stmts):
5543 before vectorization:
5544 RELATED_STMT VEC_STMT
5548 step 1: vectorize stmt S1:
5549 We first create the vector stmt VS1_0, and, as usual, record a
5550 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
5551 Next, we create the vector stmt VS1_1, and record a pointer to
5552 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
5553 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
5555 RELATED_STMT VEC_STMT
5556 VS1_0: vx0 = memref0 VS1_1 -
5557 VS1_1: vx1 = memref1 VS1_2 -
5558 VS1_2: vx2 = memref2 VS1_3 -
5559 VS1_3: vx3 = memref3 - -
5560 S1: x = load - VS1_0
5563 See in documentation in vect_get_vec_def_for_stmt_copy for how the
5564 information we recorded in RELATED_STMT field is used to vectorize
5567 /* In case of interleaving (non-unit strided access):
5574 Vectorized loads are created in the order of memory accesses
5575 starting from the access of the first stmt of the chain:
5578 VS2: vx1 = &base + vec_size*1
5579 VS3: vx3 = &base + vec_size*2
5580 VS4: vx4 = &base + vec_size*3
5582 Then permutation statements are generated:
5584 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
5585 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
5588 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5589 (the order of the data-refs in the output of vect_permute_load_chain
5590 corresponds to the order of scalar stmts in the interleaving chain - see
5591 the documentation of vect_permute_load_chain()).
5592 The generation of permutation stmts and recording them in
5593 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
5595 In case of both multiple types and interleaving, the vector loads and
5596 permutation stmts above are created for every copy. The result vector stmts
5597 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5598 STMT_VINFO_RELATED_STMT for the next copies. */
5600 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5601 on a target that supports unaligned accesses (dr_unaligned_supported)
5602 we generate the following code:
5606 p = p + indx * vectype_size;
5611 Otherwise, the data reference is potentially unaligned on a target that
5612 does not support unaligned accesses (dr_explicit_realign_optimized) -
5613 then generate the following code, in which the data in each iteration is
5614 obtained by two vector loads, one from the previous iteration, and one
5615 from the current iteration:
5617 msq_init = *(floor(p1))
5618 p2 = initial_addr + VS - 1;
5619 realignment_token = call target_builtin;
5622 p2 = p2 + indx * vectype_size
5624 vec_dest = realign_load (msq, lsq, realignment_token)
5629 /* If the misalignment remains the same throughout the execution of the
5630 loop, we can create the init_addr and permutation mask at the loop
5631 preheader. Otherwise, it needs to be created inside the loop.
5632 This can only occur when vectorizing memory accesses in the inner-loop
5633 nested within an outer-loop that is being vectorized. */
5635 if (nested_in_vect_loop_p (loop, stmt)
5636 && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
5638 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5639 compute_in_loop = true;
5642 if ((alignment_support_scheme == dr_explicit_realign_optimized
5643 || alignment_support_scheme == dr_explicit_realign)
5644 && !compute_in_loop)
5646 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token,
5647 alignment_support_scheme, NULL_TREE,
5649 if (alignment_support_scheme == dr_explicit_realign_optimized)
5651 phi = SSA_NAME_DEF_STMT (msq);
5652 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5658 prev_stmt_info = NULL;
5659 for (j = 0; j < ncopies; j++)
5661 /* 1. Create the vector pointer update chain. */
5663 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
5665 &dummy, &ptr_incr, false,
5669 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
5671 for (i = 0; i < vec_num; i++)
5674 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
5677 /* 2. Create the vector-load in the loop. */
5678 switch (alignment_support_scheme)
5681 gcc_assert (aligned_access_p (first_dr));
5682 data_ref = build_fold_indirect_ref (dataref_ptr);
5684 case dr_unaligned_supported:
5686 int mis = DR_MISALIGNMENT (first_dr);
5687 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
5689 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
5691 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
5694 case dr_explicit_realign:
5697 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5699 if (compute_in_loop)
5700 msq = vect_setup_realignment (first_stmt, bsi,
5702 dr_explicit_realign,
5705 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5706 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5707 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5708 new_temp = make_ssa_name (vec_dest, new_stmt);
5709 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5710 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5711 copy_virtual_operands (new_stmt, stmt);
5712 mark_symbols_for_renaming (new_stmt);
5715 bump = size_binop (MULT_EXPR, vs_minus_1,
5716 TYPE_SIZE_UNIT (scalar_type));
5717 ptr = bump_vector_ptr (dataref_ptr, NULL_TREE, bsi, stmt, bump);
5718 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5721 case dr_explicit_realign_optimized:
5722 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5727 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5728 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5729 new_temp = make_ssa_name (vec_dest, new_stmt);
5730 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5731 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5732 mark_symbols_for_renaming (new_stmt);
5734 /* 3. Handle explicit realignment if necessary/supported. Create in
5735 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
5736 if (alignment_support_scheme == dr_explicit_realign_optimized
5737 || alignment_support_scheme == dr_explicit_realign)
5739 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
5740 if (!realignment_token)
5741 realignment_token = dataref_ptr;
5742 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5743 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
5745 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5746 new_temp = make_ssa_name (vec_dest, new_stmt);
5747 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5748 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5750 if (alignment_support_scheme == dr_explicit_realign_optimized)
5752 if (i == vec_num - 1 && j == ncopies - 1)
5753 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
5758 /* 4. Handle invariant-load. */
5761 gcc_assert (!strided_load);
5762 gcc_assert (nested_in_vect_loop_p (loop, stmt));
5767 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
5769 /* CHECKME: bitpos depends on endianess? */
5770 bitpos = bitsize_zero_node;
5771 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
5773 BIT_FIELD_REF_UNSIGNED (vec_inv) =
5774 TYPE_UNSIGNED (scalar_type);
5776 vect_create_destination_var (scalar_dest, NULL_TREE);
5777 new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
5778 new_temp = make_ssa_name (vec_dest, new_stmt);
5779 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5780 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5782 for (k = nunits - 1; k >= 0; --k)
5783 t = tree_cons (NULL_TREE, new_temp, t);
5784 /* FIXME: use build_constructor directly. */
5785 vec_inv = build_constructor_from_list (vectype, t);
5786 new_temp = vect_init_vector (stmt, vec_inv, vectype, bsi);
5787 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5790 gcc_unreachable (); /* FORNOW. */
5793 /* Collect vector loads and later create their permutation in
5794 vect_transform_strided_load (). */
5796 VEC_quick_push (tree, dr_chain, new_temp);
5798 /* Store vector loads in the corresponding SLP_NODE. */
5800 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5803 /* FORNOW: SLP with multiple types is unsupported. */
5809 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
5811 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5812 dr_chain = VEC_alloc (tree, heap, group_size);
5817 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5819 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5820 prev_stmt_info = vinfo_for_stmt (new_stmt);
5828 /* Function vectorizable_live_operation.
5830 STMT computes a value that is used outside the loop. Check if
5831 it can be supported. */
5834 vectorizable_live_operation (tree stmt,
5835 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
5836 tree *vec_stmt ATTRIBUTE_UNUSED)
5839 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5840 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5841 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5846 enum vect_def_type dt;
5848 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5850 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5853 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5856 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
5859 /* FORNOW. CHECKME. */
5860 if (nested_in_vect_loop_p (loop, stmt))
5863 operation = GIMPLE_STMT_OPERAND (stmt, 1);
5864 op_type = TREE_OPERAND_LENGTH (operation);
5866 /* FORNOW: support only if all uses are invariant. This means
5867 that the scalar operations can remain in place, unvectorized.
5868 The original last scalar value that they compute will be used. */
5870 for (i = 0; i < op_type; i++)
5872 op = TREE_OPERAND (operation, i);
5873 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
5875 if (vect_print_dump_info (REPORT_DETAILS))
5876 fprintf (vect_dump, "use not simple.");
5880 if (dt != vect_invariant_def && dt != vect_constant_def)
5884 /* No transformation is required for the cases we currently support. */
5889 /* Function vect_is_simple_cond.
5892 LOOP - the loop that is being vectorized.
5893 COND - Condition that is checked for simple use.
5895 Returns whether a COND can be vectorized. Checks whether
5896 condition operands are supportable using vec_is_simple_use. */
5899 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
5903 enum vect_def_type dt;
5905 if (!COMPARISON_CLASS_P (cond))
5908 lhs = TREE_OPERAND (cond, 0);
5909 rhs = TREE_OPERAND (cond, 1);
5911 if (TREE_CODE (lhs) == SSA_NAME)
5913 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5914 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
5917 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5918 && TREE_CODE (lhs) != FIXED_CST)
5921 if (TREE_CODE (rhs) == SSA_NAME)
5923 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5924 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
5927 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5928 && TREE_CODE (rhs) != FIXED_CST)
5934 /* vectorizable_condition.
5936 Check if STMT is conditional modify expression that can be vectorized.
5937 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5938 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5941 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5944 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
5946 tree scalar_dest = NULL_TREE;
5947 tree vec_dest = NULL_TREE;
5948 tree op = NULL_TREE;
5949 tree cond_expr, then_clause, else_clause;
5950 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5951 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5952 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
5953 tree vec_compare, vec_cond_expr;
5955 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5956 enum machine_mode vec_mode;
5958 enum vect_def_type dt;
5959 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5960 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5962 gcc_assert (ncopies >= 1);
5964 return false; /* FORNOW */
5966 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5969 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5972 /* FORNOW: SLP not supported. */
5973 if (STMT_SLP_TYPE (stmt_info))
5976 /* FORNOW: not yet supported. */
5977 if (STMT_VINFO_LIVE_P (stmt_info))
5979 if (vect_print_dump_info (REPORT_DETAILS))
5980 fprintf (vect_dump, "value used after loop.");
5984 /* Is vectorizable conditional operation? */
5985 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5988 op = GIMPLE_STMT_OPERAND (stmt, 1);
5990 if (TREE_CODE (op) != COND_EXPR)
5993 cond_expr = TREE_OPERAND (op, 0);
5994 then_clause = TREE_OPERAND (op, 1);
5995 else_clause = TREE_OPERAND (op, 2);
5997 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
6000 /* We do not handle two different vector types for the condition
6002 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
6005 if (TREE_CODE (then_clause) == SSA_NAME)
6007 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6008 if (!vect_is_simple_use (then_clause, loop_vinfo,
6009 &then_def_stmt, &def, &dt))
6012 else if (TREE_CODE (then_clause) != INTEGER_CST
6013 && TREE_CODE (then_clause) != REAL_CST
6014 && TREE_CODE (then_clause) != FIXED_CST)
6017 if (TREE_CODE (else_clause) == SSA_NAME)
6019 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6020 if (!vect_is_simple_use (else_clause, loop_vinfo,
6021 &else_def_stmt, &def, &dt))
6024 else if (TREE_CODE (else_clause) != INTEGER_CST
6025 && TREE_CODE (else_clause) != REAL_CST
6026 && TREE_CODE (else_clause) != FIXED_CST)
6030 vec_mode = TYPE_MODE (vectype);
6034 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6035 return expand_vec_cond_expr_p (op, vec_mode);
6041 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
6042 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6044 /* Handle cond expr. */
6046 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
6048 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
6049 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
6050 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
6052 /* Arguments are ready. create the new vector stmt. */
6053 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
6054 vec_cond_lhs, vec_cond_rhs);
6055 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6056 vec_compare, vec_then_clause, vec_else_clause);
6058 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
6059 new_temp = make_ssa_name (vec_dest, *vec_stmt);
6060 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
6061 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
6067 /* Function vect_transform_stmt.
6069 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6072 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store,
6075 bool is_store = false;
6076 tree vec_stmt = NULL_TREE;
6077 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6078 tree orig_stmt_in_pattern;
6081 switch (STMT_VINFO_TYPE (stmt_info))
6083 case type_demotion_vec_info_type:
6084 gcc_assert (!slp_node);
6085 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
6089 case type_promotion_vec_info_type:
6090 gcc_assert (!slp_node);
6091 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
6095 case type_conversion_vec_info_type:
6096 done = vectorizable_conversion (stmt, bsi, &vec_stmt, slp_node);
6100 case induc_vec_info_type:
6101 gcc_assert (!slp_node);
6102 done = vectorizable_induction (stmt, bsi, &vec_stmt);
6106 case op_vec_info_type:
6107 done = vectorizable_operation (stmt, bsi, &vec_stmt, slp_node);
6111 case assignment_vec_info_type:
6112 done = vectorizable_assignment (stmt, bsi, &vec_stmt, slp_node);
6116 case load_vec_info_type:
6117 done = vectorizable_load (stmt, bsi, &vec_stmt, slp_node);
6121 case store_vec_info_type:
6122 done = vectorizable_store (stmt, bsi, &vec_stmt, slp_node);
6124 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6126 /* In case of interleaving, the whole chain is vectorized when the
6127 last store in the chain is reached. Store stmts before the last
6128 one are skipped, and there vec_stmt_info shouldn't be freed
6130 *strided_store = true;
6131 if (STMT_VINFO_VEC_STMT (stmt_info))
6138 case condition_vec_info_type:
6139 gcc_assert (!slp_node);
6140 done = vectorizable_condition (stmt, bsi, &vec_stmt);
6144 case call_vec_info_type:
6145 gcc_assert (!slp_node);
6146 done = vectorizable_call (stmt, bsi, &vec_stmt);
6149 case reduc_vec_info_type:
6150 gcc_assert (!slp_node);
6151 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
6156 if (!STMT_VINFO_LIVE_P (stmt_info))
6158 if (vect_print_dump_info (REPORT_DETAILS))
6159 fprintf (vect_dump, "stmt not supported.");
6164 if (STMT_VINFO_LIVE_P (stmt_info)
6165 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6167 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
6173 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6174 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
6175 if (orig_stmt_in_pattern)
6177 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
6178 /* STMT was inserted by the vectorizer to replace a computation idiom.
6179 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6180 computed this idiom. We need to record a pointer to VEC_STMT in
6181 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6182 documentation of vect_pattern_recog. */
6183 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
6185 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
6186 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
6195 /* This function builds ni_name = number of iterations loop executes
6196 on the loop preheader. */
6199 vect_build_loop_niters (loop_vec_info loop_vinfo)
6201 tree ni_name, stmt, var;
6203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6204 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6206 var = create_tmp_var (TREE_TYPE (ni), "niters");
6207 add_referenced_var (var);
6208 ni_name = force_gimple_operand (ni, &stmt, false, var);
6210 pe = loop_preheader_edge (loop);
6213 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6214 gcc_assert (!new_bb);
6221 /* This function generates the following statements:
6223 ni_name = number of iterations loop executes
6224 ratio = ni_name / vf
6225 ratio_mult_vf_name = ratio * vf
6227 and places them at the loop preheader edge. */
6230 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6232 tree *ratio_mult_vf_name_ptr,
6233 tree *ratio_name_ptr)
6241 tree ratio_mult_vf_name;
6242 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6243 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
6244 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6247 pe = loop_preheader_edge (loop);
6249 /* Generate temporary variable that contains
6250 number of iterations loop executes. */
6252 ni_name = vect_build_loop_niters (loop_vinfo);
6253 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
6255 /* Create: ratio = ni >> log2(vf) */
6257 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
6258 if (!is_gimple_val (ratio_name))
6260 var = create_tmp_var (TREE_TYPE (ni), "bnd");
6261 add_referenced_var (var);
6263 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
6264 pe = loop_preheader_edge (loop);
6265 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6266 gcc_assert (!new_bb);
6269 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6271 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6272 ratio_name, log_vf);
6273 if (!is_gimple_val (ratio_mult_vf_name))
6275 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
6276 add_referenced_var (var);
6278 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
6280 pe = loop_preheader_edge (loop);
6281 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6282 gcc_assert (!new_bb);
6285 *ni_name_ptr = ni_name;
6286 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6287 *ratio_name_ptr = ratio_name;
6293 /* Function vect_update_ivs_after_vectorizer.
6295 "Advance" the induction variables of LOOP to the value they should take
6296 after the execution of LOOP. This is currently necessary because the
6297 vectorizer does not handle induction variables that are used after the
6298 loop. Such a situation occurs when the last iterations of LOOP are
6300 1. We introduced new uses after LOOP for IVs that were not originally used
6301 after LOOP: the IVs of LOOP are now used by an epilog loop.
6302 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6303 times, whereas the loop IVs should be bumped N times.
6306 - LOOP - a loop that is going to be vectorized. The last few iterations
6307 of LOOP were peeled.
6308 - NITERS - the number of iterations that LOOP executes (before it is
6309 vectorized). i.e, the number of times the ivs should be bumped.
6310 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6311 coming out from LOOP on which there are uses of the LOOP ivs
6312 (this is the path from LOOP->exit to epilog_loop->preheader).
6314 The new definitions of the ivs are placed in LOOP->exit.
6315 The phi args associated with the edge UPDATE_E in the bb
6316 UPDATE_E->dest are updated accordingly.
6318 Assumption 1: Like the rest of the vectorizer, this function assumes
6319 a single loop exit that has a single predecessor.
6321 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6322 organized in the same order.
6324 Assumption 3: The access function of the ivs is simple enough (see
6325 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6327 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6328 coming out of LOOP on which the ivs of LOOP are used (this is the path
6329 that leads to the epilog loop; other paths skip the epilog loop). This
6330 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6331 needs to have its phis updated.
6335 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
6338 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6339 basic_block exit_bb = single_exit (loop)->dest;
6341 basic_block update_bb = update_e->dest;
6343 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6345 /* Make sure there exists a single-predecessor exit bb: */
6346 gcc_assert (single_pred_p (exit_bb));
6348 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
6350 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
6352 tree access_fn = NULL;
6353 tree evolution_part;
6356 tree var, ni, ni_name;
6357 block_stmt_iterator last_bsi;
6359 if (vect_print_dump_info (REPORT_DETAILS))
6361 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
6362 print_generic_expr (vect_dump, phi, TDF_SLIM);
6365 /* Skip virtual phi's. */
6366 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
6368 if (vect_print_dump_info (REPORT_DETAILS))
6369 fprintf (vect_dump, "virtual phi. skip.");
6373 /* Skip reduction phis. */
6374 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
6376 if (vect_print_dump_info (REPORT_DETAILS))
6377 fprintf (vect_dump, "reduc phi. skip.");
6381 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
6382 gcc_assert (access_fn);
6384 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
6385 gcc_assert (evolution_part != NULL_TREE);
6387 /* FORNOW: We do not support IVs whose evolution function is a polynomial
6388 of degree >= 2 or exponential. */
6389 gcc_assert (!tree_is_chrec (evolution_part));
6391 step_expr = evolution_part;
6392 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
6395 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
6396 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
6398 fold_convert (sizetype,
6399 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
6400 niters, step_expr)));
6402 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
6403 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
6404 fold_convert (TREE_TYPE (init_expr),
6411 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
6412 add_referenced_var (var);
6414 last_bsi = bsi_last (exit_bb);
6415 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
6416 true, BSI_SAME_STMT);
6418 /* Fix phi expressions in the successor bb. */
6419 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
6424 /* Function vect_do_peeling_for_loop_bound
6426 Peel the last iterations of the loop represented by LOOP_VINFO.
6427 The peeled iterations form a new epilog loop. Given that the loop now
6428 iterates NITERS times, the new epilog loop iterates
6429 NITERS % VECTORIZATION_FACTOR times.
6431 The original loop will later be made to iterate
6432 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
6435 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
6437 tree ni_name, ratio_mult_vf_name;
6438 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6439 struct loop *new_loop;
6441 basic_block preheader;
6444 int min_scalar_loop_bound;
6445 int min_profitable_iters;
6447 if (vect_print_dump_info (REPORT_DETAILS))
6448 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
6450 initialize_original_copy_tables ();
6452 /* Generate the following variables on the preheader of original loop:
6454 ni_name = number of iteration the original loop executes
6455 ratio = ni_name / vf
6456 ratio_mult_vf_name = ratio * vf */
6457 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
6458 &ratio_mult_vf_name, ratio);
6460 loop_num = loop->num;
6462 /* Analyze cost to set threshhold for vectorized loop. */
6463 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
6464 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
6465 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
6467 /* Use the cost model only if it is more conservative than user specified
6470 th = (unsigned) min_scalar_loop_bound;
6471 if (min_profitable_iters
6472 && (!min_scalar_loop_bound
6473 || min_profitable_iters > min_scalar_loop_bound))
6474 th = (unsigned) min_profitable_iters;
6476 if (((LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
6477 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6478 && vect_print_dump_info (REPORT_DETAILS))
6479 fprintf (vect_dump, "vectorization may not be profitable.");
6481 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
6482 ratio_mult_vf_name, ni_name, false,
6484 gcc_assert (new_loop);
6485 gcc_assert (loop_num == loop->num);
6486 #ifdef ENABLE_CHECKING
6487 slpeel_verify_cfg_after_peeling (loop, new_loop);
6490 /* A guard that controls whether the new_loop is to be executed or skipped
6491 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
6492 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
6493 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
6494 is on the path where the LOOP IVs are used and need to be updated. */
6496 preheader = loop_preheader_edge (new_loop)->src;
6497 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
6498 update_e = EDGE_PRED (preheader, 0);
6500 update_e = EDGE_PRED (preheader, 1);
6502 /* Update IVs of original loop as if they were advanced
6503 by ratio_mult_vf_name steps. */
6504 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
6506 /* After peeling we have to reset scalar evolution analyzer. */
6509 free_original_copy_tables ();
6513 /* Function vect_gen_niters_for_prolog_loop
6515 Set the number of iterations for the loop represented by LOOP_VINFO
6516 to the minimum between LOOP_NITERS (the original iteration count of the loop)
6517 and the misalignment of DR - the data reference recorded in
6518 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
6519 this loop, the data reference DR will refer to an aligned location.
6521 The following computation is generated:
6523 If the misalignment of DR is known at compile time:
6524 addr_mis = int mis = DR_MISALIGNMENT (dr);
6525 Else, compute address misalignment in bytes:
6526 addr_mis = addr & (vectype_size - 1)
6528 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
6530 (elem_size = element type size; an element is the scalar element
6531 whose type is the inner type of the vectype)
6535 prolog_niters = min ( LOOP_NITERS ,
6536 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
6537 where group_size is the size of the interleaved group.
6539 The above formulas assume that VF == number of elements in the vector. This
6540 may not hold when there are multiple-types in the loop.
6541 In this case, for some data-references in the loop the VF does not represent
6542 the number of elements that fit in the vector. Therefore, instead of VF we
6543 use TYPE_VECTOR_SUBPARTS. */
6546 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
6548 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
6549 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6551 tree iters, iters_name;
6554 tree dr_stmt = DR_STMT (dr);
6555 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
6556 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6557 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
6558 tree niters_type = TREE_TYPE (loop_niters);
6560 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
6561 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
6563 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6565 /* For interleaved access element size must be multiplied by the size of
6566 the interleaved group. */
6567 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
6568 DR_GROUP_FIRST_DR (stmt_info)));
6569 element_size *= group_size;
6572 pe = loop_preheader_edge (loop);
6574 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
6576 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
6577 int elem_misalign = byte_misalign / element_size;
6579 if (vect_print_dump_info (REPORT_DETAILS))
6580 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
6581 iters = build_int_cst (niters_type,
6582 (nelements - elem_misalign)&(nelements/group_size-1));
6586 tree new_stmts = NULL_TREE;
6587 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
6588 &new_stmts, NULL_TREE, loop);
6589 tree ptr_type = TREE_TYPE (start_addr);
6590 tree size = TYPE_SIZE (ptr_type);
6591 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
6592 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
6593 tree elem_size_log =
6594 build_int_cst (type, exact_log2 (vectype_align/nelements));
6595 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
6596 tree nelements_tree = build_int_cst (type, nelements);
6600 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
6601 gcc_assert (!new_bb);
6603 /* Create: byte_misalign = addr & (vectype_size - 1) */
6605 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
6607 /* Create: elem_misalign = byte_misalign / element_size */
6609 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
6611 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
6612 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
6613 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
6614 iters = fold_convert (niters_type, iters);
6617 /* Create: prolog_loop_niters = min (iters, loop_niters) */
6618 /* If the loop bound is known at compile time we already verified that it is
6619 greater than vf; since the misalignment ('iters') is at most vf, there's
6620 no need to generate the MIN_EXPR in this case. */
6621 if (TREE_CODE (loop_niters) != INTEGER_CST)
6622 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
6624 if (vect_print_dump_info (REPORT_DETAILS))
6626 fprintf (vect_dump, "niters for prolog loop: ");
6627 print_generic_expr (vect_dump, iters, TDF_SLIM);
6630 var = create_tmp_var (niters_type, "prolog_loop_niters");
6631 add_referenced_var (var);
6632 iters_name = force_gimple_operand (iters, &stmt, false, var);
6634 /* Insert stmt on loop preheader edge. */
6637 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6638 gcc_assert (!new_bb);
6645 /* Function vect_update_init_of_dr
6647 NITERS iterations were peeled from LOOP. DR represents a data reference
6648 in LOOP. This function updates the information recorded in DR to
6649 account for the fact that the first NITERS iterations had already been
6650 executed. Specifically, it updates the OFFSET field of DR. */
6653 vect_update_init_of_dr (struct data_reference *dr, tree niters)
6655 tree offset = DR_OFFSET (dr);
6657 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
6658 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
6659 DR_OFFSET (dr) = offset;
6663 /* Function vect_update_inits_of_drs
6665 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
6666 This function updates the information recorded for the data references in
6667 the loop to account for the fact that the first NITERS iterations had
6668 already been executed. Specifically, it updates the initial_condition of
6669 the access_function of all the data_references in the loop. */
6672 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
6675 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
6676 struct data_reference *dr;
6678 if (vect_print_dump_info (REPORT_DETAILS))
6679 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
6681 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
6682 vect_update_init_of_dr (dr, niters);
6686 /* Function vect_do_peeling_for_alignment
6688 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
6689 'niters' is set to the misalignment of one of the data references in the
6690 loop, thereby forcing it to refer to an aligned location at the beginning
6691 of the execution of this loop. The data reference for which we are
6692 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
6695 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
6697 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6698 tree niters_of_prolog_loop, ni_name;
6700 struct loop *new_loop;
6702 if (vect_print_dump_info (REPORT_DETAILS))
6703 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
6705 initialize_original_copy_tables ();
6707 ni_name = vect_build_loop_niters (loop_vinfo);
6708 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
6710 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
6712 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
6713 niters_of_prolog_loop, ni_name, true, 0);
6714 gcc_assert (new_loop);
6715 #ifdef ENABLE_CHECKING
6716 slpeel_verify_cfg_after_peeling (new_loop, loop);
6719 /* Update number of times loop executes. */
6720 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
6721 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
6722 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
6724 /* Update the init conditions of the access functions of all data refs. */
6725 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
6727 /* After peeling we have to reset scalar evolution analyzer. */
6730 free_original_copy_tables ();
6734 /* Function vect_create_cond_for_align_checks.
6736 Create a conditional expression that represents the alignment checks for
6737 all of data references (array element references) whose alignment must be
6741 LOOP_VINFO - two fields of the loop information are used.
6742 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
6743 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
6746 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6748 The returned value is the conditional expression to be used in the if
6749 statement that controls which version of the loop gets executed at runtime.
6751 The algorithm makes two assumptions:
6752 1) The number of bytes "n" in a vector is a power of 2.
6753 2) An address "a" is aligned if a%n is zero and that this
6754 test can be done as a&(n-1) == 0. For example, for 16
6755 byte vectors the test is a&0xf == 0. */
6758 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
6759 tree *cond_expr_stmt_list)
6761 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6762 VEC(tree,heap) *may_misalign_stmts
6763 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
6765 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
6769 tree int_ptrsize_type;
6771 tree or_tmp_name = NULL_TREE;
6772 tree and_tmp, and_tmp_name, and_stmt;
6775 /* Check that mask is one less than a power of 2, i.e., mask is
6776 all zeros followed by all ones. */
6777 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
6779 /* CHECKME: what is the best integer or unsigned type to use to hold a
6780 cast from a pointer value? */
6781 psize = TYPE_SIZE (ptr_type_node);
6783 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
6785 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
6786 of the first vector of the i'th data reference. */
6788 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
6790 tree new_stmt_list = NULL_TREE;
6792 tree addr_tmp, addr_tmp_name, addr_stmt;
6793 tree or_tmp, new_or_tmp_name, or_stmt;
6795 /* create: addr_tmp = (int)(address_of_first_vector) */
6796 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
6797 &new_stmt_list, NULL_TREE, loop);
6799 if (new_stmt_list != NULL_TREE)
6800 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
6802 sprintf (tmp_name, "%s%d", "addr2int", i);
6803 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6804 add_referenced_var (addr_tmp);
6805 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
6806 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
6807 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
6808 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
6809 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
6811 /* The addresses are OR together. */
6813 if (or_tmp_name != NULL_TREE)
6815 /* create: or_tmp = or_tmp | addr_tmp */
6816 sprintf (tmp_name, "%s%d", "orptrs", i);
6817 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6818 add_referenced_var (or_tmp);
6819 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
6820 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
6821 or_tmp_name, addr_tmp_name);
6822 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
6823 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
6824 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
6825 or_tmp_name = new_or_tmp_name;
6828 or_tmp_name = addr_tmp_name;
6832 mask_cst = build_int_cst (int_ptrsize_type, mask);
6834 /* create: and_tmp = or_tmp & mask */
6835 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
6836 add_referenced_var (and_tmp);
6837 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
6839 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
6840 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
6841 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
6842 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
6844 /* Make and_tmp the left operand of the conditional test against zero.
6845 if and_tmp has a nonzero bit then some address is unaligned. */
6846 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
6847 return build2 (EQ_EXPR, boolean_type_node,
6848 and_tmp_name, ptrsize_zero);
6851 /* Function vect_vfa_segment_size.
6853 Create an expression that computes the size of segment
6854 that will be accessed for a data reference. The functions takes into
6855 account that realignment loads may access one more vector.
6858 DR: The data reference.
6859 VECT_FACTOR: vectorization factor.
6861 Return an expression whose value is the size of segment which will be
6865 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
6867 tree segment_length;
6869 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
6872 build_int_cst (integer_type_node,
6873 GET_MODE_SIZE (TYPE_MODE (STMT_VINFO_VECTYPE
6874 (vinfo_for_stmt (DR_STMT (dr))))));
6877 fold_convert (sizetype,
6878 fold_build2 (PLUS_EXPR, integer_type_node,
6879 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
6886 fold_convert (sizetype,
6887 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
6891 return segment_length;
6894 /* Function vect_create_cond_for_alias_checks.
6896 Create a conditional expression that represents the run-time checks for
6897 overlapping of address ranges represented by a list of data references
6898 relations passed as input.
6901 COND_EXPR - input conditional expression. New conditions will be chained
6902 with logical and operation.
6903 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
6907 COND_EXPR - conditional expression.
6908 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6910 The returned value is the conditional expression to be used in the if
6911 statement that controls which version of the loop gets executed at runtime.
6915 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
6917 tree * cond_expr_stmt_list)
6919 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6920 VEC (ddr_p, heap) * may_alias_ddrs =
6921 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
6923 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
6927 tree part_cond_expr;
6929 /* Create expression
6930 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
6931 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
6935 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
6936 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
6938 if (VEC_empty (ddr_p, may_alias_ddrs))
6941 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
6943 tree stmt_a = DR_STMT (DDR_A (ddr));
6944 tree stmt_b = DR_STMT (DDR_B (ddr));
6947 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
6950 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
6953 tree segment_length_a = vect_vfa_segment_size (DDR_A (ddr), vect_factor);
6954 tree segment_length_b = vect_vfa_segment_size (DDR_B (ddr), vect_factor);
6956 if (vect_print_dump_info (REPORT_DR_DETAILS))
6959 "create runtime check for data references ");
6960 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
6961 fprintf (vect_dump, " and ");
6962 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
6967 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
6968 fold_build2 (LT_EXPR, boolean_type_node,
6969 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
6973 fold_build2 (LT_EXPR, boolean_type_node,
6974 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
6980 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
6981 *cond_expr, part_cond_expr);
6983 *cond_expr = part_cond_expr;
6985 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6986 fprintf (vect_dump, "created %u versioning for alias checks.\n",
6987 VEC_length (ddr_p, may_alias_ddrs));
6991 /* Remove a group of stores (for SLP or interleaving), free their
6995 vect_remove_stores (tree first_stmt)
6998 tree next = first_stmt;
7000 stmt_vec_info next_stmt_info;
7001 block_stmt_iterator next_si;
7005 /* Free the attached stmt_vec_info and remove the stmt. */
7006 next_si = bsi_for_stmt (next);
7007 bsi_remove (&next_si, true);
7008 next_stmt_info = vinfo_for_stmt (next);
7009 ann = stmt_ann (next);
7010 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7011 free (next_stmt_info);
7012 set_stmt_info (ann, NULL);
7018 /* Vectorize SLP instance tree in postorder. */
7021 vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
7024 bool strided_store, is_store;
7025 block_stmt_iterator si;
7026 stmt_vec_info stmt_info;
7031 vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
7032 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
7034 stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0);
7035 stmt_info = vinfo_for_stmt (stmt);
7036 SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size);
7037 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
7039 if (vect_print_dump_info (REPORT_DETAILS))
7041 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
7042 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7045 si = bsi_for_stmt (stmt);
7046 is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
7049 if (DR_GROUP_FIRST_DR (stmt_info))
7050 /* If IS_STORE is TRUE, the vectorization of the
7051 interleaving chain was completed - free all the stores in
7053 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
7055 /* FORNOW: SLP originates only from strided stores. */
7061 /* FORNOW: SLP originates only from strided stores. */
7067 vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
7069 VEC (slp_instance, heap) *slp_instances =
7070 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
7071 slp_instance instance;
7072 unsigned int vec_stmts_size;
7073 unsigned int group_size, i;
7074 unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7075 bool is_store = false;
7077 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
7079 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
7080 /* For each SLP instance calculate number of vector stmts to be created
7081 for the scalar stmts in each node of the SLP tree. Number of vector
7082 elements in one vector iteration is the number of scalar elements in
7083 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7085 vec_stmts_size = vectorization_factor * group_size / nunits;
7087 /* Schedule the tree of INSTANCE. */
7088 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
7091 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
7092 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
7093 fprintf (vect_dump, "vectorizing stmts using SLP.");
7100 /* Function vect_transform_loop.
7102 The analysis phase has determined that the loop is vectorizable.
7103 Vectorize the loop - created vectorized stmts to replace the scalar
7104 stmts in the loop, and update the loop exit condition. */
7107 vect_transform_loop (loop_vec_info loop_vinfo)
7109 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7110 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7111 int nbbs = loop->num_nodes;
7112 block_stmt_iterator si, next_si;
7115 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7117 bool slp_scheduled = false;
7118 unsigned int nunits;
7120 if (vect_print_dump_info (REPORT_DETAILS))
7121 fprintf (vect_dump, "=== vec_transform_loop ===");
7123 /* If the loop has data references that may or may not be aligned or/and
7124 has data reference relations whose independence was not proven then
7125 two versions of the loop need to be generated, one which is vectorized
7126 and one which isn't. A test is then generated to control which of the
7127 loops is executed. The test checks for the alignment of all of the
7128 data references that may or may not be aligned. An additional
7129 sequence of runtime tests is generated for each pairs of DDRs whose
7130 independence was not proven. The vectorized version of loop is
7131 executed only if both alias and alignment tests are passed. */
7133 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7134 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7137 tree cond_expr = NULL_TREE;
7138 tree cond_expr_stmt_list = NULL_TREE;
7139 basic_block condition_bb;
7140 block_stmt_iterator cond_exp_bsi;
7141 basic_block merge_bb;
7142 basic_block new_exit_bb;
7144 tree orig_phi, new_phi, arg;
7145 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
7146 tree gimplify_stmt_list;
7148 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
7150 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
7152 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7153 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
7154 &cond_expr_stmt_list);
7157 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
7159 force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
7161 append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
7163 initialize_original_copy_tables ();
7164 nloop = loop_version (loop, cond_expr, &condition_bb,
7165 prob, prob, REG_BR_PROB_BASE - prob, true);
7166 free_original_copy_tables();
7168 /** Loop versioning violates an assumption we try to maintain during
7169 vectorization - that the loop exit block has a single predecessor.
7170 After versioning, the exit block of both loop versions is the same
7171 basic block (i.e. it has two predecessors). Just in order to simplify
7172 following transformations in the vectorizer, we fix this situation
7173 here by adding a new (empty) block on the exit-edge of the loop,
7174 with the proper loop-exit phis to maintain loop-closed-form. **/
7176 merge_bb = single_exit (loop)->dest;
7177 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
7178 new_exit_bb = split_edge (single_exit (loop));
7179 new_exit_e = single_exit (loop);
7180 e = EDGE_SUCC (new_exit_bb, 0);
7182 for (orig_phi = phi_nodes (merge_bb); orig_phi;
7183 orig_phi = PHI_CHAIN (orig_phi))
7185 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
7187 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
7188 add_phi_arg (new_phi, arg, new_exit_e);
7189 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
7192 /** end loop-exit-fixes after versioning **/
7194 update_ssa (TODO_update_ssa);
7195 cond_exp_bsi = bsi_last (condition_bb);
7196 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
7199 /* CHECKME: we wouldn't need this if we called update_ssa once
7201 bitmap_zero (vect_memsyms_to_rename);
7203 /* Peel the loop if there are data refs with unknown alignment.
7204 Only one data ref with unknown store is allowed. */
7206 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
7207 vect_do_peeling_for_alignment (loop_vinfo);
7209 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7210 compile time constant), or it is a constant that doesn't divide by the
7211 vectorization factor, then an epilog loop needs to be created.
7212 We therefore duplicate the loop: the original loop will be vectorized,
7213 and will compute the first (n/VF) iterations. The second copy of the loop
7214 will remain scalar and will compute the remaining (n%VF) iterations.
7215 (VF is the vectorization factor). */
7217 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7218 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7219 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
7220 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
7222 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7223 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
7225 /* 1) Make sure the loop header has exactly two entries
7226 2) Make sure we have a preheader basic block. */
7228 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7230 split_edge (loop_preheader_edge (loop));
7232 /* FORNOW: the vectorizer supports only loops which body consist
7233 of one basic block (header + empty latch). When the vectorizer will
7234 support more involved loop forms, the order by which the BBs are
7235 traversed need to be reconsidered. */
7237 for (i = 0; i < nbbs; i++)
7239 basic_block bb = bbs[i];
7240 stmt_vec_info stmt_info;
7243 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
7245 if (vect_print_dump_info (REPORT_DETAILS))
7247 fprintf (vect_dump, "------>vectorizing phi: ");
7248 print_generic_expr (vect_dump, phi, TDF_SLIM);
7250 stmt_info = vinfo_for_stmt (phi);
7254 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7255 && !STMT_VINFO_LIVE_P (stmt_info))
7258 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7259 != (unsigned HOST_WIDE_INT) vectorization_factor)
7260 && vect_print_dump_info (REPORT_DETAILS))
7261 fprintf (vect_dump, "multiple-types.");
7263 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
7265 if (vect_print_dump_info (REPORT_DETAILS))
7266 fprintf (vect_dump, "transform phi.");
7267 vect_transform_stmt (phi, NULL, NULL, NULL);
7271 for (si = bsi_start (bb); !bsi_end_p (si);)
7273 tree stmt = bsi_stmt (si);
7276 if (vect_print_dump_info (REPORT_DETAILS))
7278 fprintf (vect_dump, "------>vectorizing statement: ");
7279 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7282 stmt_info = vinfo_for_stmt (stmt);
7284 /* vector stmts created in the outer-loop during vectorization of
7285 stmts in an inner-loop may not have a stmt_info, and do not
7286 need to be vectorized. */
7293 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7294 && !STMT_VINFO_LIVE_P (stmt_info))
7300 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
7302 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7303 if (!STMT_SLP_TYPE (stmt_info)
7304 && nunits != (unsigned int) vectorization_factor
7305 && vect_print_dump_info (REPORT_DETAILS))
7306 /* For SLP VF is set according to unrolling factor, and not to
7307 vector size, hence for SLP this print is not valid. */
7308 fprintf (vect_dump, "multiple-types.");
7310 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7312 if (STMT_SLP_TYPE (stmt_info))
7316 slp_scheduled = true;
7318 if (vect_print_dump_info (REPORT_DETAILS))
7319 fprintf (vect_dump, "=== scheduling SLP instances ===");
7321 is_store = vect_schedule_slp (loop_vinfo, nunits);
7323 /* IS_STORE is true if STMT is a store. Stores cannot be of
7324 hybrid SLP type. They are removed in
7325 vect_schedule_slp_instance and their vinfo is destroyed. */
7333 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7334 if (PURE_SLP_STMT (stmt_info))
7341 /* -------- vectorize statement ------------ */
7342 if (vect_print_dump_info (REPORT_DETAILS))
7343 fprintf (vect_dump, "transform statement.");
7345 strided_store = false;
7346 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
7350 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
7352 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7353 interleaving chain was completed - free all the stores in
7355 tree next = DR_GROUP_FIRST_DR (stmt_info);
7357 stmt_vec_info next_stmt_info;
7361 next_si = bsi_for_stmt (next);
7362 next_stmt_info = vinfo_for_stmt (next);
7363 /* Free the attached stmt_vec_info and remove the stmt. */
7364 ann = stmt_ann (next);
7365 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7366 free (next_stmt_info);
7367 set_stmt_info (ann, NULL);
7368 bsi_remove (&next_si, true);
7371 bsi_remove (&si, true);
7376 /* Free the attached stmt_vec_info and remove the stmt. */
7377 ann = stmt_ann (stmt);
7379 set_stmt_info (ann, NULL);
7380 bsi_remove (&si, true);
7388 slpeel_make_loop_iterate_ntimes (loop, ratio);
7390 mark_set_for_renaming (vect_memsyms_to_rename);
7392 /* The memory tags and pointers in vectorized statements need to
7393 have their SSA forms updated. FIXME, why can't this be delayed
7394 until all the loops have been transformed? */
7395 update_ssa (TODO_update_ssa);
7397 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7398 fprintf (vect_dump, "LOOP VECTORIZED.");
7399 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7400 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");