1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *, slp_tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, struct loop*, tree, tree *, tree *, bool, tree, bool *);
53 static tree vect_create_addr_base_for_vector_ref
54 (tree, tree *, tree, struct loop *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree, block_stmt_iterator *);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static int vect_min_worthwhile_factor (enum tree_code);
76 cost_for_stmt (tree stmt)
78 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
80 switch (STMT_VINFO_TYPE (stmt_info))
82 case load_vec_info_type:
83 return TARG_SCALAR_LOAD_COST;
84 case store_vec_info_type:
85 return TARG_SCALAR_STORE_COST;
86 case op_vec_info_type:
87 case condition_vec_info_type:
88 case assignment_vec_info_type:
89 case reduc_vec_info_type:
90 case induc_vec_info_type:
91 case type_promotion_vec_info_type:
92 case type_demotion_vec_info_type:
93 case type_conversion_vec_info_type:
94 case call_vec_info_type:
95 return TARG_SCALAR_STMT_COST;
96 case undef_vec_info_type:
103 /* Function vect_estimate_min_profitable_iters
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
113 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
116 int min_profitable_iters;
117 int peel_iters_prologue;
118 int peel_iters_epilogue;
119 int vec_inside_cost = 0;
120 int vec_outside_cost = 0;
121 int scalar_single_iter_cost = 0;
122 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
125 int nbbs = loop->num_nodes;
127 int peel_guard_costs = 0;
128 int innerloop_iters = 0, factor;
129 VEC (slp_instance, heap) *slp_instances;
130 slp_instance instance;
132 /* Cost model disabled. */
133 if (!flag_vect_cost_model)
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "cost model disabled.");
140 /* Requires loop versioning tests to handle misalignment.
141 FIXME: Make cost depend on number of stmts in may_misalign list. */
143 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
145 vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
146 if (vect_print_dump_info (REPORT_DETAILS))
147 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
151 /* Count statements in scalar loop. Using this as scalar cost for a single
154 TODO: Add outer loop support.
156 TODO: Consider assigning different costs to different scalar
161 innerloop_iters = 50; /* FIXME */
163 for (i = 0; i < nbbs; i++)
165 block_stmt_iterator si;
166 basic_block bb = bbs[i];
168 if (bb->loop_father == loop->inner)
169 factor = innerloop_iters;
173 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
175 tree stmt = bsi_stmt (si);
176 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
177 if (!STMT_VINFO_RELEVANT_P (stmt_info)
178 && !STMT_VINFO_LIVE_P (stmt_info))
180 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
181 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
182 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
183 some of the "outside" costs are generated inside the outer-loop. */
184 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
188 /* Add additional cost for the peeled instructions in prologue and epilogue
191 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
192 at compile-time - we assume it's vf/2 (the worst would be vf-1).
194 TODO: Build an expression that represents peel_iters for prologue and
195 epilogue to be used in a run-time test. */
197 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
199 if (byte_misalign < 0)
201 peel_iters_prologue = vf/2;
202 if (vect_print_dump_info (REPORT_DETAILS))
203 fprintf (vect_dump, "cost model: "
204 "prologue peel iters set to vf/2.");
206 /* If peeling for alignment is unknown, loop bound of main loop becomes
208 peel_iters_epilogue = vf/2;
209 if (vect_print_dump_info (REPORT_DETAILS))
210 fprintf (vect_dump, "cost model: "
211 "epilogue peel iters set to vf/2 because "
212 "peeling for alignment is unknown .");
214 /* If peeled iterations are unknown, count a taken branch and a not taken
215 branch per peeled loop. Even if scalar loop iterations are known,
216 vector iterations are not known since peeled prologue iterations are
217 not known. Hence guards remain the same. */
218 peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
219 + TARG_COND_NOT_TAKEN_BRANCH_COST);
226 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
227 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
228 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
229 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
231 peel_iters_prologue = nelements - (byte_misalign / element_size);
234 peel_iters_prologue = 0;
236 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
238 peel_iters_epilogue = vf/2;
239 if (vect_print_dump_info (REPORT_DETAILS))
240 fprintf (vect_dump, "cost model: "
241 "epilogue peel iters set to vf/2 because "
242 "loop iterations are unknown .");
244 /* If peeled iterations are known but number of scalar loop
245 iterations are unknown, count a taken branch per peeled loop. */
246 peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
251 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
252 peel_iters_prologue = niters < peel_iters_prologue ?
253 niters : peel_iters_prologue;
254 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
258 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
259 + (peel_iters_epilogue * scalar_single_iter_cost)
262 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
263 information we provide for the target is whether testing against the
264 threshold involves a runtime test. */
265 if (targetm.vectorize.builtin_vectorization_cost)
267 bool runtime_test = false;
269 /* If the number of iterations is unknown, or the
270 peeling-for-misalignment amount is unknown, we eill have to generate
271 a runtime test to test the loop count against the threshold. */
272 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
273 || (byte_misalign < 0))
276 targetm.vectorize.builtin_vectorization_cost (runtime_test);
277 if (vect_print_dump_info (REPORT_DETAILS))
278 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
279 targetm.vectorize.builtin_vectorization_cost (runtime_test));
283 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
284 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
286 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
287 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
290 /* Calculate number of iterations required to make the vector version
291 profitable, relative to the loop bodies only. The following condition
292 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
293 SIC = scalar iteration cost, VIC = vector iteration cost,
294 VOC = vector outside cost and VF = vectorization factor. */
296 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
298 if (vec_outside_cost <= 0)
299 min_profitable_iters = 1;
302 min_profitable_iters = (vec_outside_cost * vf
303 - vec_inside_cost * peel_iters_prologue
304 - vec_inside_cost * peel_iters_epilogue)
305 / ((scalar_single_iter_cost * vf)
308 if ((scalar_single_iter_cost * vf * min_profitable_iters)
309 <= ((vec_inside_cost * min_profitable_iters)
310 + (vec_outside_cost * vf)))
311 min_profitable_iters++;
314 /* vector version will never be profitable. */
317 if (vect_print_dump_info (REPORT_DETAILS))
318 fprintf (vect_dump, "cost model: vector iteration cost = %d "
319 "is divisible by scalar iteration cost = %d by a factor "
320 "greater than or equal to the vectorization factor = %d .",
321 vec_inside_cost, scalar_single_iter_cost, vf);
325 if (vect_print_dump_info (REPORT_DETAILS))
327 fprintf (vect_dump, "Cost model analysis: \n");
328 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
330 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
332 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
333 fprintf (vect_dump, " prologue iterations: %d\n",
334 peel_iters_prologue);
335 fprintf (vect_dump, " epilogue iterations: %d\n",
336 peel_iters_epilogue);
337 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
338 min_profitable_iters);
341 min_profitable_iters =
342 min_profitable_iters < vf ? vf : min_profitable_iters;
344 /* Because the condition we create is:
345 if (niters <= min_profitable_iters)
346 then skip the vectorized loop. */
347 min_profitable_iters--;
349 if (vect_print_dump_info (REPORT_DETAILS))
350 fprintf (vect_dump, " Profitability threshold = %d\n",
351 min_profitable_iters);
353 return min_profitable_iters;
357 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
358 functions. Design better to avoid maintenance issues. */
360 /* Function vect_model_reduction_cost.
362 Models cost for a reduction operation, including the vector ops
363 generated within the strip-mine loop, the initial definition before
364 the loop, and the epilogue code that must be generated. */
367 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
376 enum machine_mode mode;
377 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
378 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
379 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
380 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
382 /* Cost of reduction op inside loop. */
383 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
385 reduction_op = TREE_OPERAND (operation, op_type-1);
386 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
387 mode = TYPE_MODE (vectype);
388 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
391 orig_stmt = STMT_VINFO_STMT (stmt_info);
393 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
395 /* Add in cost for initial definition. */
396 outer_cost += TARG_SCALAR_TO_VEC_COST;
398 /* Determine cost of epilogue code.
400 We have a reduction operator that will reduce the vector in one statement.
401 Also requires scalar extract. */
403 if (!nested_in_vect_loop_p (loop, orig_stmt))
405 if (reduc_code < NUM_TREE_CODES)
406 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
409 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
411 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
412 int element_bitsize = tree_low_cst (bitsize, 1);
413 int nelements = vec_size_in_bits / element_bitsize;
415 optab = optab_for_tree_code (code, vectype);
417 /* We have a whole vector shift available. */
418 if (VECTOR_MODE_P (mode)
419 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
420 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
421 /* Final reduction via vector shifts and the reduction operator. Also
422 requires scalar extract. */
423 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
424 + TARG_VEC_TO_SCALAR_COST);
426 /* Use extracts and reduction op for final reduction. For N elements,
427 we have N extracts and N-1 reduction ops. */
428 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
432 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
434 if (vect_print_dump_info (REPORT_DETAILS))
435 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
436 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
437 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
441 /* Function vect_model_induction_cost.
443 Models cost for induction operations. */
446 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
448 /* loop cost for vec_loop. */
449 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
450 /* prologue cost for vec_init and vec_step. */
451 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
453 if (vect_print_dump_info (REPORT_DETAILS))
454 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
455 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
456 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
460 /* Function vect_model_simple_cost.
462 Models cost for simple operations, i.e. those that only emit ncopies of a
463 single op. Right now, this does not account for multiple insns that could
464 be generated for the single vector op. We will handle that shortly. */
467 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
468 enum vect_def_type *dt, slp_tree slp_node)
471 int inside_cost = 0, outside_cost = 0;
473 inside_cost = ncopies * TARG_VEC_STMT_COST;
475 /* FORNOW: Assuming maximum 2 args per stmts. */
476 for (i = 0; i < 2; i++)
478 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
479 outside_cost += TARG_SCALAR_TO_VEC_COST;
482 if (vect_print_dump_info (REPORT_DETAILS))
483 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
484 "outside_cost = %d .", inside_cost, outside_cost);
486 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
487 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
488 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
492 /* Function vect_cost_strided_group_size
494 For strided load or store, return the group_size only if it is the first
495 load or store of a group, else return 1. This ensures that group size is
496 only returned once per group. */
499 vect_cost_strided_group_size (stmt_vec_info stmt_info)
501 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
503 if (first_stmt == STMT_VINFO_STMT (stmt_info))
504 return DR_GROUP_SIZE (stmt_info);
510 /* Function vect_model_store_cost
512 Models cost for stores. In the case of strided accesses, one access
513 has the overhead of the strided access attributed to it. */
516 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
517 enum vect_def_type dt, slp_tree slp_node)
520 int inside_cost = 0, outside_cost = 0;
522 if (dt == vect_constant_def || dt == vect_invariant_def)
523 outside_cost = TARG_SCALAR_TO_VEC_COST;
525 /* Strided access? */
526 if (DR_GROUP_FIRST_DR (stmt_info))
527 group_size = vect_cost_strided_group_size (stmt_info);
528 /* Not a strided access. */
532 /* Is this an access in a group of stores, which provide strided access?
533 If so, add in the cost of the permutes. */
536 /* Uses a high and low interleave operation for each needed permute. */
537 inside_cost = ncopies * exact_log2(group_size) * group_size
538 * TARG_VEC_STMT_COST;
540 if (vect_print_dump_info (REPORT_DETAILS))
541 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
546 /* Costs of the stores. */
547 inside_cost += ncopies * TARG_VEC_STORE_COST;
549 if (vect_print_dump_info (REPORT_DETAILS))
550 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
551 "outside_cost = %d .", inside_cost, outside_cost);
553 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
554 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
555 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
559 /* Function vect_model_load_cost
561 Models cost for loads. In the case of strided accesses, the last access
562 has the overhead of the strided access attributed to it. Since unaligned
563 accesses are supported for loads, we also account for the costs of the
564 access scheme chosen. */
567 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
571 int alignment_support_cheme;
573 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
574 int inside_cost = 0, outside_cost = 0;
576 /* Strided accesses? */
577 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
578 if (first_stmt && !slp_node)
580 group_size = vect_cost_strided_group_size (stmt_info);
581 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
583 /* Not a strided access. */
590 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
592 /* Is this an access in a group of loads providing strided access?
593 If so, add in the cost of the permutes. */
596 /* Uses an even and odd extract operations for each needed permute. */
597 inside_cost = ncopies * exact_log2(group_size) * group_size
598 * TARG_VEC_STMT_COST;
600 if (vect_print_dump_info (REPORT_DETAILS))
601 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
606 /* The loads themselves. */
607 switch (alignment_support_cheme)
611 inside_cost += ncopies * TARG_VEC_LOAD_COST;
613 if (vect_print_dump_info (REPORT_DETAILS))
614 fprintf (vect_dump, "vect_model_load_cost: aligned.");
618 case dr_unaligned_supported:
620 /* Here, we assign an additional cost for the unaligned load. */
621 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
623 if (vect_print_dump_info (REPORT_DETAILS))
624 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
629 case dr_explicit_realign:
631 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
633 /* FIXME: If the misalignment remains fixed across the iterations of
634 the containing loop, the following cost should be added to the
636 if (targetm.vectorize.builtin_mask_for_load)
637 inside_cost += TARG_VEC_STMT_COST;
641 case dr_explicit_realign_optimized:
643 if (vect_print_dump_info (REPORT_DETAILS))
644 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
647 /* Unaligned software pipeline has a load of an address, an initial
648 load, and possibly a mask operation to "prime" the loop. However,
649 if this is an access in a group of loads, which provide strided
650 access, then the above cost should only be considered for one
651 access in the group. Inside the loop, there is a load op
652 and a realignment op. */
654 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
656 outside_cost = 2*TARG_VEC_STMT_COST;
657 if (targetm.vectorize.builtin_mask_for_load)
658 outside_cost += TARG_VEC_STMT_COST;
661 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
670 if (vect_print_dump_info (REPORT_DETAILS))
671 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
672 "outside_cost = %d .", inside_cost, outside_cost);
674 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
675 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
676 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
680 /* Function vect_get_new_vect_var.
682 Returns a name for a new variable. The current naming scheme appends the
683 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
684 the name of vectorizer generated variables, and appends that to NAME if
688 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
695 case vect_simple_var:
698 case vect_scalar_var:
701 case vect_pointer_var:
710 char* tmp = concat (prefix, name, NULL);
711 new_vect_var = create_tmp_var (type, tmp);
715 new_vect_var = create_tmp_var (type, prefix);
717 /* Mark vector typed variable as a gimple register variable. */
718 if (TREE_CODE (type) == VECTOR_TYPE)
719 DECL_GIMPLE_REG_P (new_vect_var) = true;
725 /* Function vect_create_addr_base_for_vector_ref.
727 Create an expression that computes the address of the first memory location
728 that will be accessed for a data reference.
731 STMT: The statement containing the data reference.
732 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
733 OFFSET: Optional. If supplied, it is be added to the initial address.
734 LOOP: Specify relative to which loop-nest should the address be computed.
735 For example, when the dataref is in an inner-loop nested in an
736 outer-loop that is now being vectorized, LOOP can be either the
737 outer-loop, or the inner-loop. The first memory location accessed
738 by the following dataref ('in' points to short):
745 if LOOP=i_loop: &in (relative to i_loop)
746 if LOOP=j_loop: &in+i*2B (relative to j_loop)
749 1. Return an SSA_NAME whose value is the address of the memory location of
750 the first vector of the data reference.
751 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
752 these statement(s) which define the returned SSA_NAME.
754 FORNOW: We are only handling array accesses with step 1. */
757 vect_create_addr_base_for_vector_ref (tree stmt,
762 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
763 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
764 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
765 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
767 tree data_ref_base_var;
770 tree addr_base, addr_expr;
772 tree base_offset = unshare_expr (DR_OFFSET (dr));
773 tree init = unshare_expr (DR_INIT (dr));
774 tree vect_ptr_type, addr_expr2;
775 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
778 if (loop != containing_loop)
780 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
781 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
783 gcc_assert (nested_in_vect_loop_p (loop, stmt));
785 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
786 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
787 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
790 /* Create data_ref_base */
791 base_name = build_fold_indirect_ref (data_ref_base);
792 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
793 add_referenced_var (data_ref_base_var);
794 data_ref_base = force_gimple_operand (data_ref_base, &new_base_stmt,
795 true, data_ref_base_var);
796 append_to_statement_list_force(new_base_stmt, new_stmt_list);
798 /* Create base_offset */
799 base_offset = size_binop (PLUS_EXPR, base_offset, init);
800 base_offset = fold_convert (sizetype, base_offset);
801 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
802 add_referenced_var (dest);
803 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
804 append_to_statement_list_force (new_stmt, new_stmt_list);
808 tree tmp = create_tmp_var (sizetype, "offset");
810 add_referenced_var (tmp);
811 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
812 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
813 base_offset, offset);
814 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
815 append_to_statement_list_force (new_stmt, new_stmt_list);
818 /* base + base_offset */
819 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
820 data_ref_base, base_offset);
822 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
824 /* addr_expr = addr_base */
825 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
826 get_name (base_name));
827 add_referenced_var (addr_expr);
828 vec_stmt = fold_convert (vect_ptr_type, addr_base);
829 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
830 get_name (base_name));
831 add_referenced_var (addr_expr2);
832 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
833 append_to_statement_list_force (new_stmt, new_stmt_list);
835 if (vect_print_dump_info (REPORT_DETAILS))
837 fprintf (vect_dump, "created ");
838 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
844 /* Function vect_create_data_ref_ptr.
846 Create a new pointer to vector type (vp), that points to the first location
847 accessed in the loop by STMT, along with the def-use update chain to
848 appropriately advance the pointer through the loop iterations. Also set
849 aliasing information for the pointer. This vector pointer is used by the
850 callers to this function to create a memory reference expression for vector
854 1. STMT: a stmt that references memory. Expected to be of the form
855 GIMPLE_MODIFY_STMT <name, data-ref> or
856 GIMPLE_MODIFY_STMT <data-ref, name>.
857 2. AT_LOOP: the loop where the vector memref is to be created.
858 3. OFFSET (optional): an offset to be added to the initial address accessed
859 by the data-ref in STMT.
860 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
861 pointing to the initial address.
862 5. TYPE: if not NULL indicates the required type of the data-ref
865 1. Declare a new ptr to vector_type, and have it point to the base of the
866 data reference (initial addressed accessed by the data reference).
867 For example, for vector of type V8HI, the following code is generated:
870 vp = (v8hi *)initial_address;
872 if OFFSET is not supplied:
873 initial_address = &a[init];
874 if OFFSET is supplied:
875 initial_address = &a[init + OFFSET];
877 Return the initial_address in INITIAL_ADDRESS.
879 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
880 update the pointer in each iteration of the loop.
882 Return the increment stmt that updates the pointer in PTR_INCR.
884 3. Set INV_P to true if the access pattern of the data reference in the
885 vectorized loop is invariant. Set it to false otherwise.
887 4. Return the pointer. */
890 vect_create_data_ref_ptr (tree stmt, struct loop *at_loop,
891 tree offset, tree *initial_address, tree *ptr_incr,
892 bool only_init, tree type, bool *inv_p)
895 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
896 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
897 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
898 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
899 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
900 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
906 tree new_stmt_list = NULL_TREE;
910 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
912 block_stmt_iterator incr_bsi;
914 tree indx_before_incr, indx_after_incr;
918 /* Check the step (evolution) of the load in LOOP, and record
919 whether it's invariant. */
920 if (nested_in_vect_loop)
921 step = STMT_VINFO_DR_STEP (stmt_info);
923 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
925 if (tree_int_cst_compare (step, size_zero_node) == 0)
930 /* Create an expression for the first address accessed by this load
932 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
934 if (vect_print_dump_info (REPORT_DETAILS))
936 tree data_ref_base = base_name;
937 fprintf (vect_dump, "create vector-pointer variable to type: ");
938 print_generic_expr (vect_dump, vectype, TDF_SLIM);
939 if (TREE_CODE (data_ref_base) == VAR_DECL)
940 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
941 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
942 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
943 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
944 fprintf (vect_dump, " vectorizing a record based array ref: ");
945 else if (TREE_CODE (data_ref_base) == SSA_NAME)
946 fprintf (vect_dump, " vectorizing a pointer ref: ");
947 print_generic_expr (vect_dump, base_name, TDF_SLIM);
950 /** (1) Create the new vector-pointer variable: **/
952 vect_ptr_type = build_pointer_type (type);
954 vect_ptr_type = build_pointer_type (vectype);
955 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
956 get_name (base_name));
957 add_referenced_var (vect_ptr);
959 /** (2) Add aliasing information to the new vector-pointer:
960 (The points-to info (DR_PTR_INFO) may be defined later.) **/
962 tag = DR_SYMBOL_TAG (dr);
965 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
966 tag must be created with tag added to its may alias list. */
968 new_type_alias (vect_ptr, tag, DR_REF (dr));
970 set_symbol_mem_tag (vect_ptr, tag);
972 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
974 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
975 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
976 def-use update cycles for the pointer: One relative to the outer-loop
977 (LOOP), which is what steps (3) and (4) below do. The other is relative
978 to the inner-loop (which is the inner-most loop containing the dataref),
979 and this is done be step (5) below.
981 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
982 inner-most loop, and so steps (3),(4) work the same, and step (5) is
983 redundant. Steps (3),(4) create the following:
986 LOOP: vp1 = phi(vp0,vp2)
992 If there is an inner-loop nested in loop, then step (5) will also be
993 applied, and an additional update in the inner-loop will be created:
996 LOOP: vp1 = phi(vp0,vp2)
998 inner: vp3 = phi(vp1,vp4)
999 vp4 = vp3 + inner_step
1005 /** (3) Calculate the initial address the vector-pointer, and set
1006 the vector-pointer to point to it before the loop: **/
1008 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1010 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1012 pe = loop_preheader_edge (loop);
1013 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1014 gcc_assert (!new_bb);
1015 *initial_address = new_temp;
1017 /* Create: p = (vectype *) initial_base */
1018 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1019 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
1020 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
1021 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
1022 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1023 gcc_assert (!new_bb);
1026 /** (4) Handle the updating of the vector-pointer inside the loop.
1027 This is needed when ONLY_INIT is false, and also when AT_LOOP
1028 is the inner-loop nested in LOOP (during outer-loop vectorization).
1031 if (only_init && at_loop == loop) /* No update in loop is required. */
1033 /* Copy the points-to information if it exists. */
1034 if (DR_PTR_INFO (dr))
1035 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
1036 vptr = vect_ptr_init;
1040 /* The step of the vector pointer is the Vector Size. */
1041 tree step = TYPE_SIZE_UNIT (vectype);
1042 /* One exception to the above is when the scalar step of the load in
1043 LOOP is zero. In this case the step here is also zero. */
1045 step = size_zero_node;
1047 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1049 create_iv (vect_ptr_init,
1050 fold_convert (vect_ptr_type, step),
1051 NULL_TREE, loop, &incr_bsi, insert_after,
1052 &indx_before_incr, &indx_after_incr);
1053 incr = bsi_stmt (incr_bsi);
1054 set_stmt_info (stmt_ann (incr),
1055 new_stmt_vec_info (incr, loop_vinfo));
1057 /* Copy the points-to information if it exists. */
1058 if (DR_PTR_INFO (dr))
1060 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1061 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1063 merge_alias_info (vect_ptr_init, indx_before_incr);
1064 merge_alias_info (vect_ptr_init, indx_after_incr);
1068 vptr = indx_before_incr;
1071 if (!nested_in_vect_loop || only_init)
1075 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1076 nested in LOOP, if exists: **/
1078 gcc_assert (nested_in_vect_loop);
1081 standard_iv_increment_position (containing_loop, &incr_bsi,
1083 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1084 containing_loop, &incr_bsi, insert_after, &indx_before_incr,
1086 incr = bsi_stmt (incr_bsi);
1087 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1089 /* Copy the points-to information if it exists. */
1090 if (DR_PTR_INFO (dr))
1092 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1093 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1095 merge_alias_info (vect_ptr_init, indx_before_incr);
1096 merge_alias_info (vect_ptr_init, indx_after_incr);
1100 return indx_before_incr;
1107 /* Function bump_vector_ptr
1109 Increment a pointer (to a vector type) by vector-size. If requested,
1110 i.e. if PTR-INCR is given, then also connect the new increment stmt
1111 to the existing def-use update-chain of the pointer, by modifying
1112 the PTR_INCR as illustrated below:
1114 The pointer def-use update-chain before this function:
1115 DATAREF_PTR = phi (p_0, p_2)
1117 PTR_INCR: p_2 = DATAREF_PTR + step
1119 The pointer def-use update-chain after this function:
1120 DATAREF_PTR = phi (p_0, p_2)
1122 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1124 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1127 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1129 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1130 the loop. The increment amount across iterations is expected
1132 BSI - location where the new update stmt is to be placed.
1133 STMT - the original scalar memory-access stmt that is being vectorized.
1134 BUMP - optional. The offset by which to bump the pointer. If not given,
1135 the offset is assumed to be vector_size.
1137 Output: Return NEW_DATAREF_PTR as illustrated above.
1142 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
1143 tree stmt, tree bump)
1145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1146 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1147 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1148 tree vptr_type = TREE_TYPE (dataref_ptr);
1149 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
1150 tree update = TYPE_SIZE_UNIT (vectype);
1153 use_operand_p use_p;
1154 tree new_dataref_ptr;
1159 incr_stmt = build_gimple_modify_stmt (ptr_var,
1160 build2 (POINTER_PLUS_EXPR, vptr_type,
1161 dataref_ptr, update));
1162 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1163 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
1164 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1166 /* Copy the points-to information if it exists. */
1167 if (DR_PTR_INFO (dr))
1168 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1169 merge_alias_info (new_dataref_ptr, dataref_ptr);
1172 return new_dataref_ptr;
1174 /* Update the vector-pointer's cross-iteration increment. */
1175 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1177 tree use = USE_FROM_PTR (use_p);
1179 if (use == dataref_ptr)
1180 SET_USE (use_p, new_dataref_ptr);
1182 gcc_assert (tree_int_cst_compare (use, update) == 0);
1185 return new_dataref_ptr;
1189 /* Function vect_create_destination_var.
1191 Create a new temporary of type VECTYPE. */
1194 vect_create_destination_var (tree scalar_dest, tree vectype)
1197 const char *new_name;
1199 enum vect_var_kind kind;
1201 kind = vectype ? vect_simple_var : vect_scalar_var;
1202 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1204 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1206 new_name = get_name (scalar_dest);
1209 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1210 add_referenced_var (vec_dest);
1216 /* Function vect_init_vector.
1218 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1219 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1220 is not NULL. Otherwise, place the initialization at the loop preheader.
1221 Return the DEF of INIT_STMT.
1222 It will be used in the vectorization of STMT. */
1225 vect_init_vector (tree stmt, tree vector_var, tree vector_type,
1226 block_stmt_iterator *bsi)
1228 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1236 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1237 add_referenced_var (new_var);
1238 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1239 new_temp = make_ssa_name (new_var, init_stmt);
1240 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1243 vect_finish_stmt_generation (stmt, init_stmt, bsi);
1246 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1247 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1249 if (nested_in_vect_loop_p (loop, stmt))
1251 pe = loop_preheader_edge (loop);
1252 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1253 gcc_assert (!new_bb);
1256 if (vect_print_dump_info (REPORT_DETAILS))
1258 fprintf (vect_dump, "created new init_stmt: ");
1259 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1262 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1267 /* For constant and loop invariant defs of SLP_NODE this function returns
1268 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1269 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1273 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1274 unsigned int op_num)
1276 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1277 tree stmt = VEC_index (tree, stmts, 0);
1278 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1279 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1280 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1283 int j, number_of_places_left_in_vector;
1285 tree op, vop, operation;
1286 int group_size = VEC_length (tree, stmts);
1287 unsigned int vec_num, i;
1288 int number_of_copies = 1;
1289 bool is_store = false;
1290 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1291 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1293 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1296 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1297 created vectors. It is greater than 1 if unrolling is performed.
1299 For example, we have two scalar operands, s1 and s2 (e.g., group of
1300 strided accesses of size two), while NUINTS is four (i.e., four scalars
1301 of this type can be packed in a vector). The output vector will contain
1302 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1305 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1306 containing the operands.
1308 For example, NUINTS is four as before, and the group size is 8
1309 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1310 {s5, s6, s7, s8}. */
1312 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1314 number_of_places_left_in_vector = nunits;
1315 for (j = 0; j < number_of_copies; j++)
1317 for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
1319 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1323 op = TREE_OPERAND (operation, op_num);
1325 /* Create 'vect_ = {op0,op1,...,opn}'. */
1326 t = tree_cons (NULL_TREE, op, t);
1328 number_of_places_left_in_vector--;
1330 if (number_of_places_left_in_vector == 0)
1332 number_of_places_left_in_vector = nunits;
1334 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1335 vec_cst = build_constructor_from_list (vector_type, t);
1336 VEC_quick_push (tree, voprnds,
1337 vect_init_vector (stmt, vec_cst, vector_type,
1344 /* Since the vectors are created in the reverse order, we should invert
1346 vec_num = VEC_length (tree, voprnds);
1347 for (j = vec_num - 1; j >= 0; j--)
1349 vop = VEC_index (tree, voprnds, j);
1350 VEC_quick_push (tree, *vec_oprnds, vop);
1353 VEC_free (tree, heap, voprnds);
1355 /* In case that VF is greater than the unrolling factor needed for the SLP
1356 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1357 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1358 to replicate the vectors. */
1359 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1361 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1362 VEC_quick_push (tree, *vec_oprnds, vop);
1367 /* Get vectorized defintions from SLP_NODE that contains corresponding
1368 vectorized def-stmts. */
1371 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1377 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1380 VEC_iterate (tree, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1383 gcc_assert (vec_def_stmt);
1384 vec_oprnd = GIMPLE_STMT_OPERAND (vec_def_stmt, 0);
1385 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1390 /* Get vectorized definitions for SLP_NODE.
1391 If the scalar definitions are loop invariants or constants, collect them and
1392 call vect_get_constant_vectors() to create vector stmts.
1393 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1394 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1395 vect_get_slp_vect_defs() to retrieve them. */
1398 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1399 VEC (tree,heap) **vec_oprnds1)
1401 tree operation, first_stmt;
1403 /* Allocate memory for vectorized defs. */
1404 *vec_oprnds0 = VEC_alloc (tree, heap,
1405 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1407 /* SLP_NODE corresponds either to a group of stores or to a group of
1408 unary/binary operations. We don't call this function for loads. */
1409 if (SLP_TREE_LEFT (slp_node))
1410 /* The defs are already vectorized. */
1411 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1413 /* Build vectors from scalar defs. */
1414 vect_get_constant_vectors (slp_node, vec_oprnds0, 0);
1416 first_stmt = VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1417 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1418 /* Since we don't call this function with loads, this is a group of
1422 operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
1423 if (TREE_OPERAND_LENGTH (operation) == unary_op)
1426 *vec_oprnds1 = VEC_alloc (tree, heap,
1427 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1429 if (SLP_TREE_RIGHT (slp_node))
1430 /* The defs are already vectorized. */
1431 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1433 /* Build vectors from scalar defs. */
1434 vect_get_constant_vectors (slp_node, vec_oprnds1, 1);
1438 /* Function get_initial_def_for_induction
1441 STMT - a stmt that performs an induction operation in the loop.
1442 IV_PHI - the initial value of the induction variable
1445 Return a vector variable, initialized with the first VF values of
1446 the induction variable. E.g., for an iv with IV_PHI='X' and
1447 evolution S, for a vector of 4 units, we want to return:
1448 [X, X + S, X + 2*S, X + 3*S]. */
1451 get_initial_def_for_induction (tree iv_phi)
1453 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1454 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1456 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1457 tree vectype = get_vectype_for_scalar_type (scalar_type);
1458 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1459 edge pe = loop_preheader_edge (loop);
1460 struct loop *iv_loop;
1462 tree vec, vec_init, vec_step, t;
1467 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1468 tree init_expr, step_expr;
1469 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1472 int ncopies = vf / nunits;
1474 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1475 bool nested_in_vect_loop = false;
1477 imm_use_iterator imm_iter;
1478 use_operand_p use_p;
1482 block_stmt_iterator si;
1483 basic_block bb = bb_for_stmt (iv_phi);
1485 gcc_assert (phi_info);
1486 gcc_assert (ncopies >= 1);
1488 /* Find the first insertion point in the BB. */
1489 si = bsi_after_labels (bb);
1491 if (INTEGRAL_TYPE_P (scalar_type))
1492 step_expr = build_int_cst (scalar_type, 0);
1494 step_expr = build_real (scalar_type, dconst0);
1496 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1497 if (nested_in_vect_loop_p (loop, iv_phi))
1499 nested_in_vect_loop = true;
1500 iv_loop = loop->inner;
1504 gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
1506 latch_e = loop_latch_edge (iv_loop);
1507 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1509 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
1510 gcc_assert (access_fn);
1511 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1512 &init_expr, &step_expr);
1514 pe = loop_preheader_edge (iv_loop);
1516 /* Create the vector that holds the initial_value of the induction. */
1517 if (nested_in_vect_loop)
1519 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1520 been created during vectorization of previous stmts; We obtain it from
1521 the STMT_VINFO_VEC_STMT of the defining stmt. */
1522 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1523 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
1527 /* iv_loop is the loop to be vectorized. Create:
1528 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1529 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1530 add_referenced_var (new_var);
1532 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1535 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1536 gcc_assert (!new_bb);
1540 t = tree_cons (NULL_TREE, init_expr, t);
1541 for (i = 1; i < nunits; i++)
1545 /* Create: new_name_i = new_name + step_expr */
1546 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1547 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1548 new_name = make_ssa_name (new_var, init_stmt);
1549 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1551 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1552 gcc_assert (!new_bb);
1554 if (vect_print_dump_info (REPORT_DETAILS))
1556 fprintf (vect_dump, "created new init_stmt: ");
1557 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1559 t = tree_cons (NULL_TREE, new_name, t);
1561 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1562 vec = build_constructor_from_list (vectype, nreverse (t));
1563 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
1567 /* Create the vector that holds the step of the induction. */
1568 if (nested_in_vect_loop)
1569 /* iv_loop is nested in the loop to be vectorized. Generate:
1570 vec_step = [S, S, S, S] */
1571 new_name = step_expr;
1574 /* iv_loop is the loop to be vectorized. Generate:
1575 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1576 expr = build_int_cst (scalar_type, vf);
1577 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1581 for (i = 0; i < nunits; i++)
1582 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1583 vec = build_constructor_from_list (vectype, t);
1584 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1587 /* Create the following def-use cycle:
1592 vec_iv = PHI <vec_init, vec_loop>
1596 vec_loop = vec_iv + vec_step; */
1598 /* Create the induction-phi that defines the induction-operand. */
1599 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1600 add_referenced_var (vec_dest);
1601 induction_phi = create_phi_node (vec_dest, iv_loop->header);
1602 set_stmt_info (get_stmt_ann (induction_phi),
1603 new_stmt_vec_info (induction_phi, loop_vinfo));
1604 induc_def = PHI_RESULT (induction_phi);
1606 /* Create the iv update inside the loop */
1607 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1608 build2 (PLUS_EXPR, vectype,
1609 induc_def, vec_step));
1610 vec_def = make_ssa_name (vec_dest, new_stmt);
1611 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1612 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1613 set_stmt_info (get_stmt_ann (new_stmt),
1614 new_stmt_vec_info (new_stmt, loop_vinfo));
1616 /* Set the arguments of the phi node: */
1617 add_phi_arg (induction_phi, vec_init, pe);
1618 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
1621 /* In case that vectorization factor (VF) is bigger than the number
1622 of elements that we can fit in a vectype (nunits), we have to generate
1623 more than one vector stmt - i.e - we need to "unroll" the
1624 vector stmt by a factor VF/nunits. For more details see documentation
1625 in vectorizable_operation. */
1629 stmt_vec_info prev_stmt_vinfo;
1630 /* FORNOW. This restriction should be relaxed. */
1631 gcc_assert (!nested_in_vect_loop);
1633 /* Create the vector that holds the step of the induction. */
1634 expr = build_int_cst (scalar_type, nunits);
1635 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1637 for (i = 0; i < nunits; i++)
1638 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1639 vec = build_constructor_from_list (vectype, t);
1640 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1642 vec_def = induc_def;
1643 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1644 for (i = 1; i < ncopies; i++)
1648 /* vec_i = vec_prev + vec_step */
1649 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1650 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1651 vec_def = make_ssa_name (vec_dest, new_stmt);
1652 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1653 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1654 set_stmt_info (get_stmt_ann (new_stmt),
1655 new_stmt_vec_info (new_stmt, loop_vinfo));
1656 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1657 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1661 if (nested_in_vect_loop)
1663 /* Find the loop-closed exit-phi of the induction, and record
1664 the final vector of induction results: */
1666 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1668 if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
1670 exit_phi = USE_STMT (use_p);
1676 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1677 /* FORNOW. Currently not supporting the case that an inner-loop induction
1678 is not used in the outer-loop (i.e. only outside the outer-loop). */
1679 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1680 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1682 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1683 if (vect_print_dump_info (REPORT_DETAILS))
1685 fprintf (vect_dump, "vector of inductions after inner-loop:");
1686 print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
1692 if (vect_print_dump_info (REPORT_DETAILS))
1694 fprintf (vect_dump, "transform induction: created def-use cycle:");
1695 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1696 fprintf (vect_dump, "\n");
1697 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1700 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1705 /* Function vect_get_vec_def_for_operand.
1707 OP is an operand in STMT. This function returns a (vector) def that will be
1708 used in the vectorized stmt for STMT.
1710 In the case that OP is an SSA_NAME which is defined in the loop, then
1711 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1713 In case OP is an invariant or constant, a new stmt that creates a vector def
1714 needs to be introduced. */
1717 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1722 stmt_vec_info def_stmt_info = NULL;
1723 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1724 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1725 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1726 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1732 enum vect_def_type dt;
1736 if (vect_print_dump_info (REPORT_DETAILS))
1738 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1739 print_generic_expr (vect_dump, op, TDF_SLIM);
1742 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1743 gcc_assert (is_simple_use);
1744 if (vect_print_dump_info (REPORT_DETAILS))
1748 fprintf (vect_dump, "def = ");
1749 print_generic_expr (vect_dump, def, TDF_SLIM);
1753 fprintf (vect_dump, " def_stmt = ");
1754 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1760 /* Case 1: operand is a constant. */
1761 case vect_constant_def:
1766 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1767 if (vect_print_dump_info (REPORT_DETAILS))
1768 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1770 for (i = nunits - 1; i >= 0; --i)
1772 t = tree_cons (NULL_TREE, op, t);
1774 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1775 vec_cst = build_vector (vector_type, t);
1777 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1780 /* Case 2: operand is defined outside the loop - loop invariant. */
1781 case vect_invariant_def:
1786 /* Create 'vec_inv = {inv,inv,..,inv}' */
1787 if (vect_print_dump_info (REPORT_DETAILS))
1788 fprintf (vect_dump, "Create vector_inv.");
1790 for (i = nunits - 1; i >= 0; --i)
1792 t = tree_cons (NULL_TREE, def, t);
1795 /* FIXME: use build_constructor directly. */
1796 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1797 vec_inv = build_constructor_from_list (vector_type, t);
1798 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1801 /* Case 3: operand is defined inside the loop. */
1805 *scalar_def = def_stmt;
1807 /* Get the def from the vectorized stmt. */
1808 def_stmt_info = vinfo_for_stmt (def_stmt);
1809 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1810 gcc_assert (vec_stmt);
1811 if (TREE_CODE (vec_stmt) == PHI_NODE)
1812 vec_oprnd = PHI_RESULT (vec_stmt);
1814 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1818 /* Case 4: operand is defined by a loop header phi - reduction */
1819 case vect_reduction_def:
1823 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1824 loop = (bb_for_stmt (def_stmt))->loop_father;
1826 /* Get the def before the loop */
1827 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1828 return get_initial_def_for_reduction (stmt, op, scalar_def);
1831 /* Case 5: operand is defined by loop-header phi - induction. */
1832 case vect_induction_def:
1834 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1836 /* Get the def from the vectorized stmt. */
1837 def_stmt_info = vinfo_for_stmt (def_stmt);
1838 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1839 gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
1840 vec_oprnd = PHI_RESULT (vec_stmt);
1850 /* Function vect_get_vec_def_for_stmt_copy
1852 Return a vector-def for an operand. This function is used when the
1853 vectorized stmt to be created (by the caller to this function) is a "copy"
1854 created in case the vectorized result cannot fit in one vector, and several
1855 copies of the vector-stmt are required. In this case the vector-def is
1856 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1857 of the stmt that defines VEC_OPRND.
1858 DT is the type of the vector def VEC_OPRND.
1861 In case the vectorization factor (VF) is bigger than the number
1862 of elements that can fit in a vectype (nunits), we have to generate
1863 more than one vector stmt to vectorize the scalar stmt. This situation
1864 arises when there are multiple data-types operated upon in the loop; the
1865 smallest data-type determines the VF, and as a result, when vectorizing
1866 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1867 vector stmt (each computing a vector of 'nunits' results, and together
1868 computing 'VF' results in each iteration). This function is called when
1869 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1870 which VF=16 and nunits=4, so the number of copies required is 4):
1872 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1874 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1875 VS1.1: vx.1 = memref1 VS1.2
1876 VS1.2: vx.2 = memref2 VS1.3
1877 VS1.3: vx.3 = memref3
1879 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1880 VSnew.1: vz1 = vx.1 + ... VSnew.2
1881 VSnew.2: vz2 = vx.2 + ... VSnew.3
1882 VSnew.3: vz3 = vx.3 + ...
1884 The vectorization of S1 is explained in vectorizable_load.
1885 The vectorization of S2:
1886 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1887 the function 'vect_get_vec_def_for_operand' is called to
1888 get the relevant vector-def for each operand of S2. For operand x it
1889 returns the vector-def 'vx.0'.
1891 To create the remaining copies of the vector-stmt (VSnew.j), this
1892 function is called to get the relevant vector-def for each operand. It is
1893 obtained from the respective VS1.j stmt, which is recorded in the
1894 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1896 For example, to obtain the vector-def 'vx.1' in order to create the
1897 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1898 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1899 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1900 and return its def ('vx.1').
1901 Overall, to create the above sequence this function will be called 3 times:
1902 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1903 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1904 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1907 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1909 tree vec_stmt_for_operand;
1910 stmt_vec_info def_stmt_info;
1912 /* Do nothing; can reuse same def. */
1913 if (dt == vect_invariant_def || dt == vect_constant_def )
1916 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1917 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1918 gcc_assert (def_stmt_info);
1919 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1920 gcc_assert (vec_stmt_for_operand);
1921 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1926 /* Get vectorized definitions for the operands to create a copy of an original
1927 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1930 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1931 VEC(tree,heap) **vec_oprnds0,
1932 VEC(tree,heap) **vec_oprnds1)
1934 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1936 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1937 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1941 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1942 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1943 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1948 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1951 vect_get_vec_defs (tree op0, tree op1, tree stmt, VEC(tree,heap) **vec_oprnds0,
1952 VEC(tree,heap) **vec_oprnds1, slp_tree slp_node)
1955 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1960 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1961 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1962 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1966 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1967 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1968 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1974 /* Function vect_finish_stmt_generation.
1976 Insert a new stmt. */
1979 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1980 block_stmt_iterator *bsi)
1982 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1983 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1985 gcc_assert (stmt == bsi_stmt (*bsi));
1986 gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
1988 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1990 set_stmt_info (get_stmt_ann (vec_stmt),
1991 new_stmt_vec_info (vec_stmt, loop_vinfo));
1993 if (vect_print_dump_info (REPORT_DETAILS))
1995 fprintf (vect_dump, "add new stmt: ");
1996 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
1999 /* Make sure bsi points to the stmt that is being vectorized. */
2000 gcc_assert (stmt == bsi_stmt (*bsi));
2002 #ifdef USE_MAPPED_LOCATION
2003 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
2005 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2010 /* Function get_initial_def_for_reduction
2013 STMT - a stmt that performs a reduction operation in the loop.
2014 INIT_VAL - the initial value of the reduction variable
2017 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2018 of the reduction (used for adjusting the epilog - see below).
2019 Return a vector variable, initialized according to the operation that STMT
2020 performs. This vector will be used as the initial value of the
2021 vector of partial results.
2023 Option1 (adjust in epilog): Initialize the vector as follows:
2026 min/max: [init_val,init_val,..,init_val,init_val]
2027 bit and/or: [init_val,init_val,..,init_val,init_val]
2028 and when necessary (e.g. add/mult case) let the caller know
2029 that it needs to adjust the result by init_val.
2031 Option2: Initialize the vector as follows:
2032 add: [0,0,...,0,init_val]
2033 mult: [1,1,...,1,init_val]
2034 min/max: [init_val,init_val,...,init_val]
2035 bit and/or: [init_val,init_val,...,init_val]
2036 and no adjustments are needed.
2038 For example, for the following code:
2044 STMT is 's = s + a[i]', and the reduction variable is 's'.
2045 For a vector of 4 units, we want to return either [0,0,0,init_val],
2046 or [0,0,0,0] and let the caller know that it needs to adjust
2047 the result at the end by 'init_val'.
2049 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2050 initialization vector is simpler (same element in all entries).
2051 A cost model should help decide between these two schemes. */
2054 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
2056 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2057 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2058 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2059 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2060 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2061 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2062 tree type = TREE_TYPE (init_val);
2069 bool nested_in_vect_loop = false;
2071 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
2072 if (nested_in_vect_loop_p (loop, stmt))
2073 nested_in_vect_loop = true;
2075 gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
2077 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2081 case WIDEN_SUM_EXPR:
2084 if (nested_in_vect_loop)
2085 *adjustment_def = vecdef;
2087 *adjustment_def = init_val;
2088 /* Create a vector of zeros for init_def. */
2089 if (INTEGRAL_TYPE_P (type))
2090 def_for_init = build_int_cst (type, 0);
2092 def_for_init = build_real (type, dconst0);
2093 for (i = nunits - 1; i >= 0; --i)
2094 t = tree_cons (NULL_TREE, def_for_init, t);
2095 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
2096 init_def = build_vector (vector_type, t);
2101 *adjustment_def = NULL_TREE;
2113 /* Function vect_create_epilog_for_reduction
2115 Create code at the loop-epilog to finalize the result of a reduction
2118 VECT_DEF is a vector of partial results.
2119 REDUC_CODE is the tree-code for the epilog reduction.
2120 STMT is the scalar reduction stmt that is being vectorized.
2121 REDUCTION_PHI is the phi-node that carries the reduction computation.
2124 1. Creates the reduction def-use cycle: sets the arguments for
2126 The loop-entry argument is the vectorized initial-value of the reduction.
2127 The loop-latch argument is VECT_DEF - the vector of partial sums.
2128 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2129 by applying the operation specified by REDUC_CODE if available, or by
2130 other means (whole-vector shifts or a scalar loop).
2131 The function also creates a new phi node at the loop exit to preserve
2132 loop-closed form, as illustrated below.
2134 The flow at the entry to this function:
2137 vec_def = phi <null, null> # REDUCTION_PHI
2138 VECT_DEF = vector_stmt # vectorized form of STMT
2139 s_loop = scalar_stmt # (scalar) STMT
2141 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2145 The above is transformed by this function into:
2148 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2149 VECT_DEF = vector_stmt # vectorized form of STMT
2150 s_loop = scalar_stmt # (scalar) STMT
2152 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2153 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2154 v_out2 = reduce <v_out1>
2155 s_out3 = extract_field <v_out2, 0>
2156 s_out4 = adjust_result <s_out3>
2162 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
2163 enum tree_code reduc_code, tree reduction_phi)
2165 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2167 enum machine_mode mode;
2168 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2169 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2170 basic_block exit_bb;
2174 block_stmt_iterator exit_bsi;
2176 tree new_temp = NULL_TREE;
2178 tree epilog_stmt = NULL_TREE;
2179 tree new_scalar_dest, exit_phi, new_dest;
2180 tree bitsize, bitpos, bytesize;
2181 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2182 tree adjustment_def;
2183 tree vec_initial_def;
2185 imm_use_iterator imm_iter;
2186 use_operand_p use_p;
2187 bool extract_scalar_result = false;
2188 tree reduction_op, expr;
2191 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
2192 bool nested_in_vect_loop = false;
2194 VEC(tree,heap) *phis = NULL;
2197 if (nested_in_vect_loop_p (loop, stmt))
2200 nested_in_vect_loop = true;
2203 op_type = TREE_OPERAND_LENGTH (operation);
2204 reduction_op = TREE_OPERAND (operation, op_type-1);
2205 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2206 mode = TYPE_MODE (vectype);
2208 /*** 1. Create the reduction def-use cycle ***/
2210 /* 1.1 set the loop-entry arg of the reduction-phi: */
2211 /* For the case of reduction, vect_get_vec_def_for_operand returns
2212 the scalar def before the loop, that defines the initial value
2213 of the reduction variable. */
2214 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2216 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
2218 /* 1.2 set the loop-latch arg for the reduction-phi: */
2219 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
2221 if (vect_print_dump_info (REPORT_DETAILS))
2223 fprintf (vect_dump, "transform reduction: created def-use cycle:");
2224 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
2225 fprintf (vect_dump, "\n");
2226 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
2230 /*** 2. Create epilog code
2231 The reduction epilog code operates across the elements of the vector
2232 of partial results computed by the vectorized loop.
2233 The reduction epilog code consists of:
2234 step 1: compute the scalar result in a vector (v_out2)
2235 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2236 step 3: adjust the scalar result (s_out3) if needed.
2238 Step 1 can be accomplished using one the following three schemes:
2239 (scheme 1) using reduc_code, if available.
2240 (scheme 2) using whole-vector shifts, if available.
2241 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2244 The overall epilog code looks like this:
2246 s_out0 = phi <s_loop> # original EXIT_PHI
2247 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2248 v_out2 = reduce <v_out1> # step 1
2249 s_out3 = extract_field <v_out2, 0> # step 2
2250 s_out4 = adjust_result <s_out3> # step 3
2252 (step 3 is optional, and step2 1 and 2 may be combined).
2253 Lastly, the uses of s_out0 are replaced by s_out4.
2257 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2258 v_out1 = phi <v_loop> */
2260 exit_bb = single_exit (loop)->dest;
2261 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
2262 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
2263 exit_bsi = bsi_after_labels (exit_bb);
2265 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2266 (i.e. when reduc_code is not available) and in the final adjustment
2267 code (if needed). Also get the original scalar reduction variable as
2268 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2269 represents a reduction pattern), the tree-code and scalar-def are
2270 taken from the original stmt that the pattern-stmt (STMT) replaces.
2271 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2272 are taken from STMT. */
2274 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2277 /* Regular reduction */
2282 /* Reduction pattern */
2283 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2284 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2285 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2287 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2288 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
2289 scalar_type = TREE_TYPE (scalar_dest);
2290 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2291 bitsize = TYPE_SIZE (scalar_type);
2292 bytesize = TYPE_SIZE_UNIT (scalar_type);
2295 /* In case this is a reduction in an inner-loop while vectorizing an outer
2296 loop - we don't need to extract a single scalar result at the end of the
2297 inner-loop. The final vector of partial results will be used in the
2298 vectorized outer-loop, or reduced to a scalar result at the end of the
2300 if (nested_in_vect_loop)
2301 goto vect_finalize_reduction;
2303 /* 2.3 Create the reduction code, using one of the three schemes described
2306 if (reduc_code < NUM_TREE_CODES)
2310 /*** Case 1: Create:
2311 v_out2 = reduc_expr <v_out1> */
2313 if (vect_print_dump_info (REPORT_DETAILS))
2314 fprintf (vect_dump, "Reduce using direct vector reduction.");
2316 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2317 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2318 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2319 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2320 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2321 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2323 extract_scalar_result = true;
2327 enum tree_code shift_code = 0;
2328 bool have_whole_vector_shift = true;
2330 int element_bitsize = tree_low_cst (bitsize, 1);
2331 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2334 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2335 shift_code = VEC_RSHIFT_EXPR;
2337 have_whole_vector_shift = false;
2339 /* Regardless of whether we have a whole vector shift, if we're
2340 emulating the operation via tree-vect-generic, we don't want
2341 to use it. Only the first round of the reduction is likely
2342 to still be profitable via emulation. */
2343 /* ??? It might be better to emit a reduction tree code here, so that
2344 tree-vect-generic can expand the first round via bit tricks. */
2345 if (!VECTOR_MODE_P (mode))
2346 have_whole_vector_shift = false;
2349 optab optab = optab_for_tree_code (code, vectype);
2350 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
2351 have_whole_vector_shift = false;
2354 if (have_whole_vector_shift)
2356 /*** Case 2: Create:
2357 for (offset = VS/2; offset >= element_size; offset/=2)
2359 Create: va' = vec_shift <va, offset>
2360 Create: va = vop <va, va'>
2363 if (vect_print_dump_info (REPORT_DETAILS))
2364 fprintf (vect_dump, "Reduce using vector shifts");
2366 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2367 new_temp = PHI_RESULT (new_phi);
2369 for (bit_offset = vec_size_in_bits/2;
2370 bit_offset >= element_bitsize;
2373 tree bitpos = size_int (bit_offset);
2374 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
2375 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2376 new_name = make_ssa_name (vec_dest, epilog_stmt);
2377 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2378 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2380 tmp = build2 (code, vectype, new_name, new_temp);
2381 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2382 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2383 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2384 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2387 extract_scalar_result = true;
2393 /*** Case 3: Create:
2394 s = extract_field <v_out2, 0>
2395 for (offset = element_size;
2396 offset < vector_size;
2397 offset += element_size;)
2399 Create: s' = extract_field <v_out2, offset>
2400 Create: s = op <s, s'>
2403 if (vect_print_dump_info (REPORT_DETAILS))
2404 fprintf (vect_dump, "Reduce using scalar code. ");
2406 vec_temp = PHI_RESULT (new_phi);
2407 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2408 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2410 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2411 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2412 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2413 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2414 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2416 for (bit_offset = element_bitsize;
2417 bit_offset < vec_size_in_bits;
2418 bit_offset += element_bitsize)
2421 tree bitpos = bitsize_int (bit_offset);
2422 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2425 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2426 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2427 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
2428 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2429 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2431 tmp = build2 (code, scalar_type, new_name, new_temp);
2432 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
2433 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2434 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2435 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2438 extract_scalar_result = false;
2442 /* 2.4 Extract the final scalar result. Create:
2443 s_out3 = extract_field <v_out2, bitpos> */
2445 if (extract_scalar_result)
2449 gcc_assert (!nested_in_vect_loop);
2450 if (vect_print_dump_info (REPORT_DETAILS))
2451 fprintf (vect_dump, "extract scalar result");
2453 if (BYTES_BIG_ENDIAN)
2454 bitpos = size_binop (MULT_EXPR,
2455 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2456 TYPE_SIZE (scalar_type));
2458 bitpos = bitsize_zero_node;
2460 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2461 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2462 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2463 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2464 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2465 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2468 vect_finalize_reduction:
2470 /* 2.5 Adjust the final result by the initial value of the reduction
2471 variable. (When such adjustment is not needed, then
2472 'adjustment_def' is zero). For example, if code is PLUS we create:
2473 new_temp = loop_exit_def + adjustment_def */
2477 if (nested_in_vect_loop)
2479 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2480 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2481 new_dest = vect_create_destination_var (scalar_dest, vectype);
2485 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2486 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2487 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2489 epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
2490 new_temp = make_ssa_name (new_dest, epilog_stmt);
2491 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2492 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2496 /* 2.6 Handle the loop-exit phi */
2498 /* Replace uses of s_out0 with uses of s_out3:
2499 Find the loop-closed-use at the loop exit of the original scalar result.
2500 (The reduction result is expected to have two immediate uses - one at the
2501 latch block, and one at the loop exit). */
2502 phis = VEC_alloc (tree, heap, 10);
2503 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2505 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
2507 exit_phi = USE_STMT (use_p);
2508 VEC_quick_push (tree, phis, exit_phi);
2511 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2512 gcc_assert (!VEC_empty (tree, phis));
2514 for (i = 0; VEC_iterate (tree, phis, i, exit_phi); i++)
2516 if (nested_in_vect_loop)
2518 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2520 /* FORNOW. Currently not supporting the case that an inner-loop reduction
2521 is not used in the outer-loop (but only outside the outer-loop). */
2522 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2523 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2525 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2526 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2527 set_stmt_info (get_stmt_ann (epilog_stmt),
2528 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2532 /* Replace the uses: */
2533 orig_name = PHI_RESULT (exit_phi);
2534 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2535 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2536 SET_USE (use_p, new_temp);
2538 VEC_free (tree, heap, phis);
2542 /* Function vectorizable_reduction.
2544 Check if STMT performs a reduction operation that can be vectorized.
2545 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2546 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2547 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2549 This function also handles reduction idioms (patterns) that have been
2550 recognized in advance during vect_pattern_recog. In this case, STMT may be
2552 X = pattern_expr (arg0, arg1, ..., X)
2553 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2554 sequence that had been detected and replaced by the pattern-stmt (STMT).
2556 In some cases of reduction patterns, the type of the reduction variable X is
2557 different than the type of the other arguments of STMT.
2558 In such cases, the vectype that is used when transforming STMT into a vector
2559 stmt is different than the vectype that is used to determine the
2560 vectorization factor, because it consists of a different number of elements
2561 than the actual number of elements that are being operated upon in parallel.
2563 For example, consider an accumulation of shorts into an int accumulator.
2564 On some targets it's possible to vectorize this pattern operating on 8
2565 shorts at a time (hence, the vectype for purposes of determining the
2566 vectorization factor should be V8HI); on the other hand, the vectype that
2567 is used to create the vector form is actually V4SI (the type of the result).
2569 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2570 indicates what is the actual level of parallelism (V8HI in the example), so
2571 that the right vectorization factor would be derived. This vectype
2572 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2573 be used to create the vectorized stmt. The right vectype for the vectorized
2574 stmt is obtained from the type of the result X:
2575 get_vectype_for_scalar_type (TREE_TYPE (X))
2577 This means that, contrary to "regular" reductions (or "regular" stmts in
2578 general), the following equation:
2579 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2580 does *NOT* necessarily hold for reduction patterns. */
2583 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2588 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2589 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2590 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2591 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2592 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2594 enum tree_code code, orig_code, epilog_reduc_code = 0;
2595 enum machine_mode vec_mode;
2597 optab optab, reduc_optab;
2598 tree new_temp = NULL_TREE;
2600 enum vect_def_type dt;
2605 stmt_vec_info orig_stmt_info;
2606 tree expr = NULL_TREE;
2608 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2609 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2610 stmt_vec_info prev_stmt_info;
2612 tree new_stmt = NULL_TREE;
2615 if (nested_in_vect_loop_p (loop, stmt))
2618 /* FORNOW. This restriction should be relaxed. */
2621 if (vect_print_dump_info (REPORT_DETAILS))
2622 fprintf (vect_dump, "multiple types in nested loop.");
2627 gcc_assert (ncopies >= 1);
2629 /* FORNOW: SLP not supported. */
2630 if (STMT_SLP_TYPE (stmt_info))
2633 /* 1. Is vectorizable reduction? */
2635 /* Not supportable if the reduction variable is used in the loop. */
2636 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
2639 /* Reductions that are not used even in an enclosing outer-loop,
2640 are expected to be "live" (used out of the loop). */
2641 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2642 && !STMT_VINFO_LIVE_P (stmt_info))
2645 /* Make sure it was already recognized as a reduction computation. */
2646 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2649 /* 2. Has this been recognized as a reduction pattern?
2651 Check if STMT represents a pattern that has been recognized
2652 in earlier analysis stages. For stmts that represent a pattern,
2653 the STMT_VINFO_RELATED_STMT field records the last stmt in
2654 the original sequence that constitutes the pattern. */
2656 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2659 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2660 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2661 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2662 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2665 /* 3. Check the operands of the operation. The first operands are defined
2666 inside the loop body. The last operand is the reduction variable,
2667 which is defined by the loop-header-phi. */
2669 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2671 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2672 code = TREE_CODE (operation);
2673 op_type = TREE_OPERAND_LENGTH (operation);
2674 if (op_type != binary_op && op_type != ternary_op)
2676 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2677 scalar_type = TREE_TYPE (scalar_dest);
2679 /* All uses but the last are expected to be defined in the loop.
2680 The last use is the reduction variable. */
2681 for (i = 0; i < op_type-1; i++)
2683 op = TREE_OPERAND (operation, i);
2684 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2685 gcc_assert (is_simple_use);
2686 if (dt != vect_loop_def
2687 && dt != vect_invariant_def
2688 && dt != vect_constant_def
2689 && dt != vect_induction_def)
2693 op = TREE_OPERAND (operation, i);
2694 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2695 gcc_assert (is_simple_use);
2696 gcc_assert (dt == vect_reduction_def);
2697 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2699 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2701 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2703 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2706 /* 4. Supportable by target? */
2708 /* 4.1. check support for the operation in the loop */
2709 optab = optab_for_tree_code (code, vectype);
2712 if (vect_print_dump_info (REPORT_DETAILS))
2713 fprintf (vect_dump, "no optab.");
2716 vec_mode = TYPE_MODE (vectype);
2717 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2719 if (vect_print_dump_info (REPORT_DETAILS))
2720 fprintf (vect_dump, "op not supported by target.");
2721 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2722 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2723 < vect_min_worthwhile_factor (code))
2725 if (vect_print_dump_info (REPORT_DETAILS))
2726 fprintf (vect_dump, "proceeding using word mode.");
2729 /* Worthwhile without SIMD support? */
2730 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2731 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2732 < vect_min_worthwhile_factor (code))
2734 if (vect_print_dump_info (REPORT_DETAILS))
2735 fprintf (vect_dump, "not worthwhile without SIMD support.");
2739 /* 4.2. Check support for the epilog operation.
2741 If STMT represents a reduction pattern, then the type of the
2742 reduction variable may be different than the type of the rest
2743 of the arguments. For example, consider the case of accumulation
2744 of shorts into an int accumulator; The original code:
2745 S1: int_a = (int) short_a;
2746 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2749 STMT: int_acc = widen_sum <short_a, int_acc>
2752 1. The tree-code that is used to create the vector operation in the
2753 epilog code (that reduces the partial results) is not the
2754 tree-code of STMT, but is rather the tree-code of the original
2755 stmt from the pattern that STMT is replacing. I.e, in the example
2756 above we want to use 'widen_sum' in the loop, but 'plus' in the
2758 2. The type (mode) we use to check available target support
2759 for the vector operation to be created in the *epilog*, is
2760 determined by the type of the reduction variable (in the example
2761 above we'd check this: plus_optab[vect_int_mode]).
2762 However the type (mode) we use to check available target support
2763 for the vector operation to be created *inside the loop*, is
2764 determined by the type of the other arguments to STMT (in the
2765 example we'd check this: widen_sum_optab[vect_short_mode]).
2767 This is contrary to "regular" reductions, in which the types of all
2768 the arguments are the same as the type of the reduction variable.
2769 For "regular" reductions we can therefore use the same vector type
2770 (and also the same tree-code) when generating the epilog code and
2771 when generating the code inside the loop. */
2775 /* This is a reduction pattern: get the vectype from the type of the
2776 reduction variable, and get the tree-code from orig_stmt. */
2777 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2778 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2779 vec_mode = TYPE_MODE (vectype);
2783 /* Regular reduction: use the same vectype and tree-code as used for
2784 the vector code inside the loop can be used for the epilog code. */
2788 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2790 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2793 if (vect_print_dump_info (REPORT_DETAILS))
2794 fprintf (vect_dump, "no optab for reduction.");
2795 epilog_reduc_code = NUM_TREE_CODES;
2797 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
2799 if (vect_print_dump_info (REPORT_DETAILS))
2800 fprintf (vect_dump, "reduc op not supported by target.");
2801 epilog_reduc_code = NUM_TREE_CODES;
2804 if (!vec_stmt) /* transformation not required. */
2806 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2807 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2813 if (vect_print_dump_info (REPORT_DETAILS))
2814 fprintf (vect_dump, "transform reduction.");
2816 /* Create the destination vector */
2817 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2819 /* Create the reduction-phi that defines the reduction-operand. */
2820 new_phi = create_phi_node (vec_dest, loop->header);
2822 /* In case the vectorization factor (VF) is bigger than the number
2823 of elements that we can fit in a vectype (nunits), we have to generate
2824 more than one vector stmt - i.e - we need to "unroll" the
2825 vector stmt by a factor VF/nunits. For more details see documentation
2826 in vectorizable_operation. */
2828 prev_stmt_info = NULL;
2829 for (j = 0; j < ncopies; j++)
2834 op = TREE_OPERAND (operation, 0);
2835 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2836 if (op_type == ternary_op)
2838 op = TREE_OPERAND (operation, 1);
2839 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2842 /* Get the vector def for the reduction variable from the phi node */
2843 reduc_def = PHI_RESULT (new_phi);
2847 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2848 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2849 if (op_type == ternary_op)
2850 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2852 /* Get the vector def for the reduction variable from the vectorized
2853 reduction operation generated in the previous iteration (j-1) */
2854 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2857 /* Arguments are ready. create the new vector stmt. */
2858 if (op_type == binary_op)
2859 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2861 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2863 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2864 new_temp = make_ssa_name (vec_dest, new_stmt);
2865 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2866 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2869 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2871 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2872 prev_stmt_info = vinfo_for_stmt (new_stmt);
2875 /* Finalize the reduction-phi (set it's arguments) and create the
2876 epilog reduction code. */
2877 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2881 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2882 a function declaration if the target has a vectorized version
2883 of the function, or NULL_TREE if the function cannot be vectorized. */
2886 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2888 tree fndecl = get_callee_fndecl (call);
2889 enum built_in_function code;
2891 /* We only handle functions that do not read or clobber memory -- i.e.
2892 const or novops ones. */
2893 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2897 || TREE_CODE (fndecl) != FUNCTION_DECL
2898 || !DECL_BUILT_IN (fndecl))
2901 code = DECL_FUNCTION_CODE (fndecl);
2902 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2906 /* Function vectorizable_call.
2908 Check if STMT performs a function call that can be vectorized.
2909 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2910 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2911 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2914 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2920 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2921 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2922 tree vectype_out, vectype_in;
2925 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2926 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2927 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2928 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2930 int ncopies, j, nargs;
2931 call_expr_arg_iterator iter;
2933 enum { NARROW, NONE, WIDEN } modifier;
2935 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2938 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2941 /* FORNOW: SLP not supported. */
2942 if (STMT_SLP_TYPE (stmt_info))
2945 /* FORNOW: not yet supported. */
2946 if (STMT_VINFO_LIVE_P (stmt_info))
2948 if (vect_print_dump_info (REPORT_DETAILS))
2949 fprintf (vect_dump, "value used after loop.");
2953 /* Is STMT a vectorizable call? */
2954 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2957 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2960 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2961 if (TREE_CODE (operation) != CALL_EXPR)
2964 /* Process function arguments. */
2965 rhs_type = NULL_TREE;
2967 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2969 /* Bail out if the function has more than two arguments, we
2970 do not have interesting builtin functions to vectorize with
2971 more than two arguments. */
2975 /* We can only handle calls with arguments of the same type. */
2977 && rhs_type != TREE_TYPE (op))
2979 if (vect_print_dump_info (REPORT_DETAILS))
2980 fprintf (vect_dump, "argument types differ.");
2983 rhs_type = TREE_TYPE (op);
2985 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2987 if (vect_print_dump_info (REPORT_DETAILS))
2988 fprintf (vect_dump, "use not simple.");
2995 /* No arguments is also not good. */
2999 vectype_in = get_vectype_for_scalar_type (rhs_type);
3000 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3002 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
3003 vectype_out = get_vectype_for_scalar_type (lhs_type);
3004 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3007 if (nunits_in == nunits_out / 2)
3009 else if (nunits_out == nunits_in)
3011 else if (nunits_out == nunits_in / 2)
3016 /* For now, we only vectorize functions if a target specific builtin
3017 is available. TODO -- in some cases, it might be profitable to
3018 insert the calls for pieces of the vector, in order to be able
3019 to vectorize other operations in the loop. */
3020 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
3021 if (fndecl == NULL_TREE)
3023 if (vect_print_dump_info (REPORT_DETAILS))
3024 fprintf (vect_dump, "function is not vectorizable.");
3029 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
3031 if (modifier == NARROW)
3032 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3034 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3036 /* Sanity check: make sure that at least one copy of the vectorized stmt
3037 needs to be generated. */
3038 gcc_assert (ncopies >= 1);
3040 /* FORNOW. This restriction should be relaxed. */
3041 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3043 if (vect_print_dump_info (REPORT_DETAILS))
3044 fprintf (vect_dump, "multiple types in nested loop.");
3048 if (!vec_stmt) /* transformation not required. */
3050 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3051 if (vect_print_dump_info (REPORT_DETAILS))
3052 fprintf (vect_dump, "=== vectorizable_call ===");
3053 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3059 if (vect_print_dump_info (REPORT_DETAILS))
3060 fprintf (vect_dump, "transform operation.");
3062 /* FORNOW. This restriction should be relaxed. */
3063 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3065 if (vect_print_dump_info (REPORT_DETAILS))
3066 fprintf (vect_dump, "multiple types in nested loop.");
3071 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3072 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3074 prev_stmt_info = NULL;
3078 for (j = 0; j < ncopies; ++j)
3080 /* Build argument list for the vectorized call. */
3081 /* FIXME: Rewrite this so that it doesn't
3082 construct a temporary list. */
3085 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3089 = vect_get_vec_def_for_operand (op, stmt, NULL);
3092 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3094 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3098 vargs = nreverse (vargs);
3100 rhs = build_function_call_expr (fndecl, vargs);
3101 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3102 new_temp = make_ssa_name (vec_dest, new_stmt);
3103 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3105 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3108 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3110 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3112 prev_stmt_info = vinfo_for_stmt (new_stmt);
3118 for (j = 0; j < ncopies; ++j)
3120 /* Build argument list for the vectorized call. */
3121 /* FIXME: Rewrite this so that it doesn't
3122 construct a temporary list. */
3125 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3130 = vect_get_vec_def_for_operand (op, stmt, NULL);
3132 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3137 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3139 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3142 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3143 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
3147 vargs = nreverse (vargs);
3149 rhs = build_function_call_expr (fndecl, vargs);
3150 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3151 new_temp = make_ssa_name (vec_dest, new_stmt);
3152 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3154 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3157 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3159 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3161 prev_stmt_info = vinfo_for_stmt (new_stmt);
3164 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3169 /* No current target implements this case. */
3173 /* The call in STMT might prevent it from being removed in dce.
3174 We however cannot remove it here, due to the way the ssa name
3175 it defines is mapped to the new definition. So just replace
3176 rhs of the statement with something harmless. */
3177 type = TREE_TYPE (scalar_dest);
3178 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
3185 /* Function vect_gen_widened_results_half
3187 Create a vector stmt whose code, type, number of arguments, and result
3188 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
3189 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3190 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3191 needs to be created (DECL is a function-decl of a target-builtin).
3192 STMT is the original scalar stmt that we are vectorizing. */
3195 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
3196 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3197 tree vec_dest, block_stmt_iterator *bsi,
3206 /* Generate half of the widened result: */
3207 if (code == CALL_EXPR)
3209 /* Target specific support */
3210 if (op_type == binary_op)
3211 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
3213 expr = build_call_expr (decl, 1, vec_oprnd0);
3217 /* Generic support */
3218 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3219 if (op_type == binary_op)
3220 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
3222 expr = build1 (code, vectype, vec_oprnd0);
3224 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3225 new_temp = make_ssa_name (vec_dest, new_stmt);
3226 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3227 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3229 if (code == CALL_EXPR)
3231 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3233 if (TREE_CODE (sym) == SSA_NAME)
3234 sym = SSA_NAME_VAR (sym);
3235 mark_sym_for_renaming (sym);
3243 /* Check if STMT performs a conversion operation, that can be vectorized.
3244 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3245 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3246 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3249 vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
3250 tree *vec_stmt, slp_tree slp_node)
3256 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3257 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3258 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3259 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3260 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3261 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3264 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3265 tree new_stmt = NULL_TREE;
3266 stmt_vec_info prev_stmt_info;
3269 tree vectype_out, vectype_in;
3272 tree rhs_type, lhs_type;
3274 enum { NARROW, NONE, WIDEN } modifier;
3276 VEC(tree,heap) *vec_oprnds0 = NULL;
3279 /* Is STMT a vectorizable conversion? */
3281 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3284 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3287 if (STMT_VINFO_LIVE_P (stmt_info))
3289 /* FORNOW: not yet supported. */
3290 if (vect_print_dump_info (REPORT_DETAILS))
3291 fprintf (vect_dump, "value used after loop.");
3295 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3298 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3301 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3302 code = TREE_CODE (operation);
3303 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3306 /* Check types of lhs and rhs. */
3307 op0 = TREE_OPERAND (operation, 0);
3308 rhs_type = TREE_TYPE (op0);
3309 vectype_in = get_vectype_for_scalar_type (rhs_type);
3310 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3312 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3313 lhs_type = TREE_TYPE (scalar_dest);
3314 vectype_out = get_vectype_for_scalar_type (lhs_type);
3315 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3318 if (nunits_in == nunits_out / 2)
3320 else if (nunits_out == nunits_in)
3322 else if (nunits_out == nunits_in / 2)
3327 if (modifier == NONE)
3328 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3330 /* Bail out if the types are both integral or non-integral. */
3331 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3332 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3335 if (modifier == NARROW)
3336 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3338 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3340 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3341 this, so we can safely override NCOPIES with 1 here. */
3345 /* Sanity check: make sure that at least one copy of the vectorized stmt
3346 needs to be generated. */
3347 gcc_assert (ncopies >= 1);
3349 /* FORNOW. This restriction should be relaxed. */
3350 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3352 if (vect_print_dump_info (REPORT_DETAILS))
3353 fprintf (vect_dump, "multiple types in nested loop.");
3357 /* Check the operands of the operation. */
3358 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3360 if (vect_print_dump_info (REPORT_DETAILS))
3361 fprintf (vect_dump, "use not simple.");
3365 /* Supportable by target? */
3366 if ((modifier == NONE
3367 && !targetm.vectorize.builtin_conversion (code, vectype_in))
3368 || (modifier == WIDEN
3369 && !supportable_widening_operation (code, stmt, vectype_in,
3372 || (modifier == NARROW
3373 && !supportable_narrowing_operation (code, stmt, vectype_in,
3376 if (vect_print_dump_info (REPORT_DETAILS))
3377 fprintf (vect_dump, "op not supported by target.");
3381 if (modifier != NONE)
3383 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3384 /* FORNOW: SLP not supported. */
3385 if (STMT_SLP_TYPE (stmt_info))
3389 if (!vec_stmt) /* transformation not required. */
3391 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3396 if (vect_print_dump_info (REPORT_DETAILS))
3397 fprintf (vect_dump, "transform conversion.");
3400 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3402 if (modifier == NONE && !slp_node)
3403 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3405 prev_stmt_info = NULL;
3409 for (j = 0; j < ncopies; j++)
3415 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
3417 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3420 targetm.vectorize.builtin_conversion (code, vectype_in);
3421 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3423 new_stmt = build_call_expr (builtin_decl, 1, vop0);
3425 /* Arguments are ready. create the new vector stmt. */
3426 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
3427 new_temp = make_ssa_name (vec_dest, new_stmt);
3428 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3429 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3430 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3431 SSA_OP_ALL_VIRTUALS)
3433 if (TREE_CODE (sym) == SSA_NAME)
3434 sym = SSA_NAME_VAR (sym);
3435 mark_sym_for_renaming (sym);
3438 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3442 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3444 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3445 prev_stmt_info = vinfo_for_stmt (new_stmt);
3450 /* In case the vectorization factor (VF) is bigger than the number
3451 of elements that we can fit in a vectype (nunits), we have to
3452 generate more than one vector stmt - i.e - we need to "unroll"
3453 the vector stmt by a factor VF/nunits. */
3454 for (j = 0; j < ncopies; j++)
3457 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3459 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3461 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3463 /* Generate first half of the widened result: */
3465 = vect_gen_widened_results_half (code1, vectype_out, decl1,
3466 vec_oprnd0, vec_oprnd1,
3467 unary_op, vec_dest, bsi, stmt);
3469 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3471 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3472 prev_stmt_info = vinfo_for_stmt (new_stmt);
3474 /* Generate second half of the widened result: */
3476 = vect_gen_widened_results_half (code2, vectype_out, decl2,
3477 vec_oprnd0, vec_oprnd1,
3478 unary_op, vec_dest, bsi, stmt);
3479 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3480 prev_stmt_info = vinfo_for_stmt (new_stmt);
3485 /* In case the vectorization factor (VF) is bigger than the number
3486 of elements that we can fit in a vectype (nunits), we have to
3487 generate more than one vector stmt - i.e - we need to "unroll"
3488 the vector stmt by a factor VF/nunits. */
3489 for (j = 0; j < ncopies; j++)
3494 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3495 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3499 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3500 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3503 /* Arguments are ready. Create the new vector stmt. */
3504 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3505 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3506 new_temp = make_ssa_name (vec_dest, new_stmt);
3507 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3508 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3511 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3513 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3515 prev_stmt_info = vinfo_for_stmt (new_stmt);
3518 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3525 /* Function vectorizable_assignment.
3527 Check if STMT performs an assignment (copy) that can be vectorized.
3528 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3529 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3530 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3533 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3539 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3540 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3541 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3544 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3545 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3546 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3548 VEC(tree,heap) *vec_oprnds = NULL;
3551 gcc_assert (ncopies >= 1);
3553 return false; /* FORNOW */
3555 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3558 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3561 /* FORNOW: not yet supported. */
3562 if (STMT_VINFO_LIVE_P (stmt_info))
3564 if (vect_print_dump_info (REPORT_DETAILS))
3565 fprintf (vect_dump, "value used after loop.");
3569 /* Is vectorizable assignment? */
3570 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3573 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3574 if (TREE_CODE (scalar_dest) != SSA_NAME)
3577 op = GIMPLE_STMT_OPERAND (stmt, 1);
3578 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
3580 if (vect_print_dump_info (REPORT_DETAILS))
3581 fprintf (vect_dump, "use not simple.");
3585 if (!vec_stmt) /* transformation not required. */
3587 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3588 if (vect_print_dump_info (REPORT_DETAILS))
3589 fprintf (vect_dump, "=== vectorizable_assignment ===");
3590 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3595 if (vect_print_dump_info (REPORT_DETAILS))
3596 fprintf (vect_dump, "transform assignment.");
3599 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3602 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3604 /* Arguments are ready. create the new vector stmt. */
3605 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3607 *vec_stmt = build_gimple_modify_stmt (vec_dest, vop);
3608 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3609 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3610 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3611 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3614 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3617 VEC_free (tree, heap, vec_oprnds);
3622 /* Function vect_min_worthwhile_factor.
3624 For a loop where we could vectorize the operation indicated by CODE,
3625 return the minimum vectorization factor that makes it worthwhile
3626 to use generic vectors. */
3628 vect_min_worthwhile_factor (enum tree_code code)
3649 /* Function vectorizable_induction
3651 Check if PHI performs an induction computation that can be vectorized.
3652 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3653 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3654 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3657 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3660 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3661 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3662 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3663 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3664 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3667 gcc_assert (ncopies >= 1);
3669 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3672 /* FORNOW: SLP not supported. */
3673 if (STMT_SLP_TYPE (stmt_info))
3676 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3678 if (STMT_VINFO_LIVE_P (stmt_info))
3680 /* FORNOW: not yet supported. */
3681 if (vect_print_dump_info (REPORT_DETAILS))
3682 fprintf (vect_dump, "value used after loop.");
3686 if (TREE_CODE (phi) != PHI_NODE)
3689 if (!vec_stmt) /* transformation not required. */
3691 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3692 if (vect_print_dump_info (REPORT_DETAILS))
3693 fprintf (vect_dump, "=== vectorizable_induction ===");
3694 vect_model_induction_cost (stmt_info, ncopies);
3700 if (vect_print_dump_info (REPORT_DETAILS))
3701 fprintf (vect_dump, "transform induction phi.");
3703 vec_def = get_initial_def_for_induction (phi);
3704 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3709 /* Function vectorizable_operation.
3711 Check if STMT performs a binary or unary operation that can be vectorized.
3712 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3713 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3714 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3717 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3723 tree op0, op1 = NULL;
3724 tree vec_oprnd1 = NULL_TREE;
3725 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3726 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3727 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3728 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3729 enum tree_code code;
3730 enum machine_mode vec_mode;
3735 enum machine_mode optab_op2_mode;
3737 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3738 tree new_stmt = NULL_TREE;
3739 stmt_vec_info prev_stmt_info;
3740 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3743 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3745 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3748 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3749 this, so we can safely override NCOPIES with 1 here. */
3752 gcc_assert (ncopies >= 1);
3753 /* FORNOW. This restriction should be relaxed. */
3754 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3756 if (vect_print_dump_info (REPORT_DETAILS))
3757 fprintf (vect_dump, "multiple types in nested loop.");
3761 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3764 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3767 /* FORNOW: not yet supported. */
3768 if (STMT_VINFO_LIVE_P (stmt_info))
3770 if (vect_print_dump_info (REPORT_DETAILS))
3771 fprintf (vect_dump, "value used after loop.");
3775 /* Is STMT a vectorizable binary/unary operation? */
3776 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3779 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3782 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3783 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3784 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3785 if (nunits_out != nunits_in)
3788 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3789 code = TREE_CODE (operation);
3791 /* For pointer addition, we should use the normal plus for
3792 the vector addition. */
3793 if (code == POINTER_PLUS_EXPR)
3796 optab = optab_for_tree_code (code, vectype);
3798 /* Support only unary or binary operations. */
3799 op_type = TREE_OPERAND_LENGTH (operation);
3800 if (op_type != unary_op && op_type != binary_op)
3802 if (vect_print_dump_info (REPORT_DETAILS))
3803 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3807 op0 = TREE_OPERAND (operation, 0);
3808 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3810 if (vect_print_dump_info (REPORT_DETAILS))
3811 fprintf (vect_dump, "use not simple.");
3815 if (op_type == binary_op)
3817 op1 = TREE_OPERAND (operation, 1);
3818 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3820 if (vect_print_dump_info (REPORT_DETAILS))
3821 fprintf (vect_dump, "use not simple.");
3826 /* Supportable by target? */
3829 if (vect_print_dump_info (REPORT_DETAILS))
3830 fprintf (vect_dump, "no optab.");
3833 vec_mode = TYPE_MODE (vectype);
3834 icode = (int) optab_handler (optab, vec_mode)->insn_code;
3835 if (icode == CODE_FOR_nothing)
3837 if (vect_print_dump_info (REPORT_DETAILS))
3838 fprintf (vect_dump, "op not supported by target.");
3839 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3840 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3841 < vect_min_worthwhile_factor (code))
3843 if (vect_print_dump_info (REPORT_DETAILS))
3844 fprintf (vect_dump, "proceeding using word mode.");
3847 /* Worthwhile without SIMD support? */
3848 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3849 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3850 < vect_min_worthwhile_factor (code))
3852 if (vect_print_dump_info (REPORT_DETAILS))
3853 fprintf (vect_dump, "not worthwhile without SIMD support.");
3857 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3859 /* FORNOW: not yet supported. */
3860 if (!VECTOR_MODE_P (vec_mode))
3863 /* Invariant argument is needed for a vector shift
3864 by a scalar shift operand. */
3865 optab_op2_mode = insn_data[icode].operand[2].mode;
3866 if (! (VECTOR_MODE_P (optab_op2_mode)
3867 || dt[1] == vect_constant_def
3868 || dt[1] == vect_invariant_def))
3870 if (vect_print_dump_info (REPORT_DETAILS))
3871 fprintf (vect_dump, "operand mode requires invariant argument.");
3876 if (!vec_stmt) /* transformation not required. */
3878 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3879 if (vect_print_dump_info (REPORT_DETAILS))
3880 fprintf (vect_dump, "=== vectorizable_operation ===");
3881 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3887 if (vect_print_dump_info (REPORT_DETAILS))
3888 fprintf (vect_dump, "transform binary/unary operation.");
3891 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3895 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3896 if (op_type == binary_op)
3897 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3900 /* In case the vectorization factor (VF) is bigger than the number
3901 of elements that we can fit in a vectype (nunits), we have to generate
3902 more than one vector stmt - i.e - we need to "unroll" the
3903 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3904 from one copy of the vector stmt to the next, in the field
3905 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3906 stages to find the correct vector defs to be used when vectorizing
3907 stmts that use the defs of the current stmt. The example below illustrates
3908 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3909 4 vectorized stmts):
3911 before vectorization:
3912 RELATED_STMT VEC_STMT
3916 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3918 RELATED_STMT VEC_STMT
3919 VS1_0: vx0 = memref0 VS1_1 -
3920 VS1_1: vx1 = memref1 VS1_2 -
3921 VS1_2: vx2 = memref2 VS1_3 -
3922 VS1_3: vx3 = memref3 - -
3923 S1: x = load - VS1_0
3926 step2: vectorize stmt S2 (done here):
3927 To vectorize stmt S2 we first need to find the relevant vector
3928 def for the first operand 'x'. This is, as usual, obtained from
3929 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3930 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3931 relevant vector def 'vx0'. Having found 'vx0' we can generate
3932 the vector stmt VS2_0, and as usual, record it in the
3933 STMT_VINFO_VEC_STMT of stmt S2.
3934 When creating the second copy (VS2_1), we obtain the relevant vector
3935 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3936 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3937 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3938 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3939 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3940 chain of stmts and pointers:
3941 RELATED_STMT VEC_STMT
3942 VS1_0: vx0 = memref0 VS1_1 -
3943 VS1_1: vx1 = memref1 VS1_2 -
3944 VS1_2: vx2 = memref2 VS1_3 -
3945 VS1_3: vx3 = memref3 - -
3946 S1: x = load - VS1_0
3947 VS2_0: vz0 = vx0 + v1 VS2_1 -
3948 VS2_1: vz1 = vx1 + v1 VS2_2 -
3949 VS2_2: vz2 = vx2 + v1 VS2_3 -
3950 VS2_3: vz3 = vx3 + v1 - -
3951 S2: z = x + 1 - VS2_0 */
3953 prev_stmt_info = NULL;
3954 for (j = 0; j < ncopies; j++)
3959 if (op_type == binary_op
3960 && (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3963 /* Vector shl and shr insn patterns can be defined with scalar
3964 operand 2 (shift operand). In this case, use constant or loop
3965 invariant op1 directly, without extending it to vector mode
3967 optab_op2_mode = insn_data[icode].operand[2].mode;
3968 if (!VECTOR_MODE_P (optab_op2_mode))
3970 if (vect_print_dump_info (REPORT_DETAILS))
3971 fprintf (vect_dump, "operand 1 using scalar mode.");
3973 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3977 if (op_type == binary_op && !vec_oprnd1)
3978 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3981 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, &vec_oprnds1,
3985 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3987 /* Arguments are ready. Create the new vector stmt. */
3988 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3990 if (op_type == binary_op)
3992 vop1 = VEC_index (tree, vec_oprnds1, i);
3993 new_stmt = build_gimple_modify_stmt (vec_dest,
3994 build2 (code, vectype, vop0, vop1));
3997 new_stmt = build_gimple_modify_stmt (vec_dest,
3998 build1 (code, vectype, vop0));
4000 new_temp = make_ssa_name (vec_dest, new_stmt);
4001 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4002 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4004 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4008 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4010 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4011 prev_stmt_info = vinfo_for_stmt (new_stmt);
4014 VEC_free (tree, heap, vec_oprnds0);
4016 VEC_free (tree, heap, vec_oprnds1);
4022 /* Function vectorizable_type_demotion
4024 Check if STMT performs a binary or unary operation that involves
4025 type demotion, and if it can be vectorized.
4026 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4027 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4028 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4031 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
4038 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4039 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4040 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4041 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4042 enum tree_code code, code1 = ERROR_MARK;
4045 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4047 stmt_vec_info prev_stmt_info;
4056 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4059 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4062 /* FORNOW: not yet supported. */
4063 if (STMT_VINFO_LIVE_P (stmt_info))
4065 if (vect_print_dump_info (REPORT_DETAILS))
4066 fprintf (vect_dump, "value used after loop.");
4070 /* Is STMT a vectorizable type-demotion operation? */
4071 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4074 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4077 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4078 code = TREE_CODE (operation);
4079 if (code != NOP_EXPR && code != CONVERT_EXPR)
4082 op0 = TREE_OPERAND (operation, 0);
4083 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4084 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4086 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4087 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4088 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4089 if (nunits_in != nunits_out / 2) /* FORNOW */
4092 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4093 gcc_assert (ncopies >= 1);
4094 /* FORNOW. This restriction should be relaxed. */
4095 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4097 if (vect_print_dump_info (REPORT_DETAILS))
4098 fprintf (vect_dump, "multiple types in nested loop.");
4102 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4103 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4104 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4105 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4106 && (code == NOP_EXPR || code == CONVERT_EXPR))))
4109 /* Check the operands of the operation. */
4110 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4112 if (vect_print_dump_info (REPORT_DETAILS))
4113 fprintf (vect_dump, "use not simple.");
4117 /* Supportable by target? */
4118 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
4121 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4123 if (!vec_stmt) /* transformation not required. */
4125 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4126 if (vect_print_dump_info (REPORT_DETAILS))
4127 fprintf (vect_dump, "=== vectorizable_demotion ===");
4128 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
4133 if (vect_print_dump_info (REPORT_DETAILS))
4134 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
4138 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4140 /* In case the vectorization factor (VF) is bigger than the number
4141 of elements that we can fit in a vectype (nunits), we have to generate
4142 more than one vector stmt - i.e - we need to "unroll" the
4143 vector stmt by a factor VF/nunits. */
4144 prev_stmt_info = NULL;
4145 for (j = 0; j < ncopies; j++)
4150 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4151 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4155 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
4156 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4159 /* Arguments are ready. Create the new vector stmt. */
4160 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
4161 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
4162 new_temp = make_ssa_name (vec_dest, new_stmt);
4163 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4164 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4167 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4169 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4171 prev_stmt_info = vinfo_for_stmt (new_stmt);
4174 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4179 /* Function vectorizable_type_promotion
4181 Check if STMT performs a binary or unary operation that involves
4182 type promotion, and if it can be vectorized.
4183 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4184 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4185 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4188 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
4194 tree op0, op1 = NULL;
4195 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4196 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4197 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4198 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4199 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4200 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4203 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4205 stmt_vec_info prev_stmt_info;
4213 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4216 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4219 /* FORNOW: not yet supported. */
4220 if (STMT_VINFO_LIVE_P (stmt_info))
4222 if (vect_print_dump_info (REPORT_DETAILS))
4223 fprintf (vect_dump, "value used after loop.");
4227 /* Is STMT a vectorizable type-promotion operation? */
4228 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4231 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4234 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4235 code = TREE_CODE (operation);
4236 if (code != NOP_EXPR && code != CONVERT_EXPR
4237 && code != WIDEN_MULT_EXPR)
4240 op0 = TREE_OPERAND (operation, 0);
4241 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4242 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4244 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4245 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4246 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4247 if (nunits_out != nunits_in / 2) /* FORNOW */
4250 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4251 gcc_assert (ncopies >= 1);
4252 /* FORNOW. This restriction should be relaxed. */
4253 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4255 if (vect_print_dump_info (REPORT_DETAILS))
4256 fprintf (vect_dump, "multiple types in nested loop.");
4260 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4261 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4262 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4263 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4264 && (code == CONVERT_EXPR || code == NOP_EXPR))))
4267 /* Check the operands of the operation. */
4268 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4270 if (vect_print_dump_info (REPORT_DETAILS))
4271 fprintf (vect_dump, "use not simple.");
4275 op_type = TREE_CODE_LENGTH (code);
4276 if (op_type == binary_op)
4278 op1 = TREE_OPERAND (operation, 1);
4279 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
4281 if (vect_print_dump_info (REPORT_DETAILS))
4282 fprintf (vect_dump, "use not simple.");
4287 /* Supportable by target? */
4288 if (!supportable_widening_operation (code, stmt, vectype_in,
4289 &decl1, &decl2, &code1, &code2))
4292 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4294 if (!vec_stmt) /* transformation not required. */
4296 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4297 if (vect_print_dump_info (REPORT_DETAILS))
4298 fprintf (vect_dump, "=== vectorizable_promotion ===");
4299 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
4305 if (vect_print_dump_info (REPORT_DETAILS))
4306 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
4310 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4312 /* In case the vectorization factor (VF) is bigger than the number
4313 of elements that we can fit in a vectype (nunits), we have to generate
4314 more than one vector stmt - i.e - we need to "unroll" the
4315 vector stmt by a factor VF/nunits. */
4317 prev_stmt_info = NULL;
4318 for (j = 0; j < ncopies; j++)
4323 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4324 if (op_type == binary_op)
4325 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
4329 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4330 if (op_type == binary_op)
4331 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
4334 /* Arguments are ready. Create the new vector stmt. We are creating
4335 two vector defs because the widened result does not fit in one vector.
4336 The vectorized stmt can be expressed as a call to a taregt builtin,
4337 or a using a tree-code. */
4338 /* Generate first half of the widened result: */
4339 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
4340 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4342 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4344 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4345 prev_stmt_info = vinfo_for_stmt (new_stmt);
4347 /* Generate second half of the widened result: */
4348 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
4349 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4350 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4351 prev_stmt_info = vinfo_for_stmt (new_stmt);
4355 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4360 /* Function vect_strided_store_supported.
4362 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4363 and FALSE otherwise. */
4366 vect_strided_store_supported (tree vectype)
4368 optab interleave_high_optab, interleave_low_optab;
4371 mode = (int) TYPE_MODE (vectype);
4373 /* Check that the operation is supported. */
4374 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
4376 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
4378 if (!interleave_high_optab || !interleave_low_optab)
4380 if (vect_print_dump_info (REPORT_DETAILS))
4381 fprintf (vect_dump, "no optab for interleave.");
4385 if (optab_handler (interleave_high_optab, mode)->insn_code
4387 || optab_handler (interleave_low_optab, mode)->insn_code
4388 == CODE_FOR_nothing)
4390 if (vect_print_dump_info (REPORT_DETAILS))
4391 fprintf (vect_dump, "interleave op not supported by target.");
4399 /* Function vect_permute_store_chain.
4401 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4402 a power of 2, generate interleave_high/low stmts to reorder the data
4403 correctly for the stores. Return the final references for stores in
4406 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4407 The input is 4 vectors each containing 8 elements. We assign a number to each
4408 element, the input sequence is:
4410 1st vec: 0 1 2 3 4 5 6 7
4411 2nd vec: 8 9 10 11 12 13 14 15
4412 3rd vec: 16 17 18 19 20 21 22 23
4413 4th vec: 24 25 26 27 28 29 30 31
4415 The output sequence should be:
4417 1st vec: 0 8 16 24 1 9 17 25
4418 2nd vec: 2 10 18 26 3 11 19 27
4419 3rd vec: 4 12 20 28 5 13 21 30
4420 4th vec: 6 14 22 30 7 15 23 31
4422 i.e., we interleave the contents of the four vectors in their order.
4424 We use interleave_high/low instructions to create such output. The input of
4425 each interleave_high/low operation is two vectors:
4428 the even elements of the result vector are obtained left-to-right from the
4429 high/low elements of the first vector. The odd elements of the result are
4430 obtained left-to-right from the high/low elements of the second vector.
4431 The output of interleave_high will be: 0 4 1 5
4432 and of interleave_low: 2 6 3 7
4435 The permutation is done in log LENGTH stages. In each stage interleave_high
4436 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4437 where the first argument is taken from the first half of DR_CHAIN and the
4438 second argument from it's second half.
4441 I1: interleave_high (1st vec, 3rd vec)
4442 I2: interleave_low (1st vec, 3rd vec)
4443 I3: interleave_high (2nd vec, 4th vec)
4444 I4: interleave_low (2nd vec, 4th vec)
4446 The output for the first stage is:
4448 I1: 0 16 1 17 2 18 3 19
4449 I2: 4 20 5 21 6 22 7 23
4450 I3: 8 24 9 25 10 26 11 27
4451 I4: 12 28 13 29 14 30 15 31
4453 The output of the second stage, i.e. the final result is:
4455 I1: 0 8 16 24 1 9 17 25
4456 I2: 2 10 18 26 3 11 19 27
4457 I3: 4 12 20 28 5 13 21 30
4458 I4: 6 14 22 30 7 15 23 31. */
4461 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4462 unsigned int length,
4464 block_stmt_iterator *bsi,
4465 VEC(tree,heap) **result_chain)
4467 tree perm_dest, perm_stmt, vect1, vect2, high, low;
4468 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4469 tree scalar_dest, tmp;
4472 VEC(tree,heap) *first, *second;
4474 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4475 first = VEC_alloc (tree, heap, length/2);
4476 second = VEC_alloc (tree, heap, length/2);
4478 /* Check that the operation is supported. */
4479 if (!vect_strided_store_supported (vectype))
4482 *result_chain = VEC_copy (tree, heap, dr_chain);
4484 for (i = 0; i < exact_log2 (length); i++)
4486 for (j = 0; j < length/2; j++)
4488 vect1 = VEC_index (tree, dr_chain, j);
4489 vect2 = VEC_index (tree, dr_chain, j+length/2);
4491 /* Create interleaving stmt:
4492 in the case of big endian:
4493 high = interleave_high (vect1, vect2)
4494 and in the case of little endian:
4495 high = interleave_low (vect1, vect2). */
4496 perm_dest = create_tmp_var (vectype, "vect_inter_high");
4497 DECL_GIMPLE_REG_P (perm_dest) = 1;
4498 add_referenced_var (perm_dest);
4499 if (BYTES_BIG_ENDIAN)
4500 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4502 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4503 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4504 high = make_ssa_name (perm_dest, perm_stmt);
4505 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
4506 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4507 VEC_replace (tree, *result_chain, 2*j, high);
4509 /* Create interleaving stmt:
4510 in the case of big endian:
4511 low = interleave_low (vect1, vect2)
4512 and in the case of little endian:
4513 low = interleave_high (vect1, vect2). */
4514 perm_dest = create_tmp_var (vectype, "vect_inter_low");
4515 DECL_GIMPLE_REG_P (perm_dest) = 1;
4516 add_referenced_var (perm_dest);
4517 if (BYTES_BIG_ENDIAN)
4518 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4520 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4521 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4522 low = make_ssa_name (perm_dest, perm_stmt);
4523 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
4524 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4525 VEC_replace (tree, *result_chain, 2*j+1, low);
4527 dr_chain = VEC_copy (tree, heap, *result_chain);
4533 /* Function vectorizable_store.
4535 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4537 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4538 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4539 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4542 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
4548 tree vec_oprnd = NULL_TREE;
4549 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4550 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4551 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4552 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4553 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4554 enum machine_mode vec_mode;
4556 enum dr_alignment_support alignment_support_scheme;
4558 enum vect_def_type dt;
4559 stmt_vec_info prev_stmt_info = NULL;
4560 tree dataref_ptr = NULL_TREE;
4561 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4562 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4564 tree next_stmt, first_stmt;
4565 bool strided_store = false;
4566 unsigned int group_size, i;
4567 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
4569 VEC(tree,heap) *vec_oprnds = NULL;
4570 bool slp = (slp_node != NULL);
4571 stmt_vec_info first_stmt_vinfo;
4572 unsigned int vec_num;
4574 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
4575 this, so we can safely override NCOPIES with 1 here. */
4579 gcc_assert (ncopies >= 1);
4581 /* FORNOW. This restriction should be relaxed. */
4582 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4584 if (vect_print_dump_info (REPORT_DETAILS))
4585 fprintf (vect_dump, "multiple types in nested loop.");
4589 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4592 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4595 if (STMT_VINFO_LIVE_P (stmt_info))
4597 if (vect_print_dump_info (REPORT_DETAILS))
4598 fprintf (vect_dump, "value used after loop.");
4602 /* Is vectorizable store? */
4604 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4607 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4608 if (TREE_CODE (scalar_dest) != ARRAY_REF
4609 && TREE_CODE (scalar_dest) != INDIRECT_REF
4610 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
4613 op = GIMPLE_STMT_OPERAND (stmt, 1);
4614 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4616 if (vect_print_dump_info (REPORT_DETAILS))
4617 fprintf (vect_dump, "use not simple.");
4621 vec_mode = TYPE_MODE (vectype);
4622 /* FORNOW. In some cases can vectorize even if data-type not supported
4623 (e.g. - array initialization with 0). */
4624 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
4627 if (!STMT_VINFO_DATA_REF (stmt_info))
4630 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4632 strided_store = true;
4633 if (!vect_strided_store_supported (vectype)
4634 && !PURE_SLP_STMT (stmt_info) && !slp)
4638 if (!vec_stmt) /* transformation not required. */
4640 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4641 if (!PURE_SLP_STMT (stmt_info))
4642 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
4650 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4651 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4652 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4654 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4657 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
4659 /* We vectorize all the stmts of the interleaving group when we
4660 reach the last stmt in the group. */
4661 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4662 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
4665 *vec_stmt = NULL_TREE;
4670 strided_store = false;
4672 /* VEC_NUM is the number of vect stmts to be created for this group. */
4673 if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
4674 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4676 vec_num = group_size;
4682 group_size = vec_num = 1;
4683 first_stmt_vinfo = stmt_info;
4686 if (vect_print_dump_info (REPORT_DETAILS))
4687 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
4689 dr_chain = VEC_alloc (tree, heap, group_size);
4690 oprnds = VEC_alloc (tree, heap, group_size);
4692 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
4693 gcc_assert (alignment_support_scheme);
4694 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
4696 /* In case the vectorization factor (VF) is bigger than the number
4697 of elements that we can fit in a vectype (nunits), we have to generate
4698 more than one vector stmt - i.e - we need to "unroll" the
4699 vector stmt by a factor VF/nunits. For more details see documentation in
4700 vect_get_vec_def_for_copy_stmt. */
4702 /* In case of interleaving (non-unit strided access):
4709 We create vectorized stores starting from base address (the access of the
4710 first stmt in the chain (S2 in the above example), when the last store stmt
4711 of the chain (S4) is reached:
4714 VS2: &base + vec_size*1 = vx0
4715 VS3: &base + vec_size*2 = vx1
4716 VS4: &base + vec_size*3 = vx3
4718 Then permutation statements are generated:
4720 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4721 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4724 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4725 (the order of the data-refs in the output of vect_permute_store_chain
4726 corresponds to the order of scalar stmts in the interleaving chain - see
4727 the documentation of vect_permute_store_chain()).
4729 In case of both multiple types and interleaving, above vector stores and
4730 permutation stmts are created for every copy. The result vector stmts are
4731 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4732 STMT_VINFO_RELATED_STMT for the next copies.
4735 prev_stmt_info = NULL;
4736 for (j = 0; j < ncopies; j++)
4745 /* Get vectorized arguments for SLP_NODE. */
4746 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
4748 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4752 /* For interleaved stores we collect vectorized defs for all the
4753 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4754 used as an input to vect_permute_store_chain(), and OPRNDS as
4755 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4757 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4758 OPRNDS are of size 1. */
4759 next_stmt = first_stmt;
4760 for (i = 0; i < group_size; i++)
4762 /* Since gaps are not supported for interleaved stores,
4763 GROUP_SIZE is the exact number of stmts in the chain.
4764 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4765 there is no interleaving, GROUP_SIZE is 1, and only one
4766 iteration of the loop will be executed. */
4767 gcc_assert (next_stmt);
4768 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4770 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4772 VEC_quick_push(tree, dr_chain, vec_oprnd);
4773 VEC_quick_push(tree, oprnds, vec_oprnd);
4774 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4777 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
4778 &dummy, &ptr_incr, false,
4779 TREE_TYPE (vec_oprnd), &inv_p);
4780 gcc_assert (!inv_p);
4784 /* FORNOW SLP doesn't work for multiple types. */
4787 /* For interleaved stores we created vectorized defs for all the
4788 defs stored in OPRNDS in the previous iteration (previous copy).
4789 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4790 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4792 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4793 OPRNDS are of size 1. */
4794 for (i = 0; i < group_size; i++)
4796 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
4797 VEC_index (tree, oprnds, i));
4798 VEC_replace(tree, dr_chain, i, vec_oprnd);
4799 VEC_replace(tree, oprnds, i, vec_oprnd);
4802 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
4807 result_chain = VEC_alloc (tree, heap, group_size);
4809 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4814 next_stmt = first_stmt;
4815 for (i = 0; i < vec_num; i++)
4818 /* Bump the vector pointer. */
4819 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
4823 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4824 else if (strided_store)
4825 /* For strided stores vectorized defs are interleaved in
4826 vect_permute_store_chain(). */
4827 vec_oprnd = VEC_index (tree, result_chain, i);
4829 data_ref = build_fold_indirect_ref (dataref_ptr);
4830 /* Arguments are ready. Create the new vector stmt. */
4831 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4832 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4833 mark_symbols_for_renaming (new_stmt);
4836 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4838 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4840 prev_stmt_info = vinfo_for_stmt (new_stmt);
4841 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4851 /* Function vect_setup_realignment
4853 This function is called when vectorizing an unaligned load using
4854 the dr_explicit_realign[_optimized] scheme.
4855 This function generates the following code at the loop prolog:
4858 x msq_init = *(floor(p)); # prolog load
4859 realignment_token = call target_builtin;
4861 x msq = phi (msq_init, ---)
4863 The stmts marked with x are generated only for the case of
4864 dr_explicit_realign_optimized.
4866 The code above sets up a new (vector) pointer, pointing to the first
4867 location accessed by STMT, and a "floor-aligned" load using that pointer.
4868 It also generates code to compute the "realignment-token" (if the relevant
4869 target hook was defined), and creates a phi-node at the loop-header bb
4870 whose arguments are the result of the prolog-load (created by this
4871 function) and the result of a load that takes place in the loop (to be
4872 created by the caller to this function).
4874 For the case of dr_explicit_realign_optimized:
4875 The caller to this function uses the phi-result (msq) to create the
4876 realignment code inside the loop, and sets up the missing phi argument,
4879 msq = phi (msq_init, lsq)
4880 lsq = *(floor(p')); # load in loop
4881 result = realign_load (msq, lsq, realignment_token);
4883 For the case of dr_explicit_realign:
4885 msq = *(floor(p)); # load in loop
4887 lsq = *(floor(p')); # load in loop
4888 result = realign_load (msq, lsq, realignment_token);
4891 STMT - (scalar) load stmt to be vectorized. This load accesses
4892 a memory location that may be unaligned.
4893 BSI - place where new code is to be inserted.
4894 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4898 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4899 target hook, if defined.
4900 Return value - the result of the loop-header phi node. */
4903 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4904 tree *realignment_token,
4905 enum dr_alignment_support alignment_support_scheme,
4907 struct loop **at_loop)
4909 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4910 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4911 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4912 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4914 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4921 tree msq_init = NULL_TREE;
4924 tree msq = NULL_TREE;
4925 tree stmts = NULL_TREE;
4927 bool compute_in_loop = false;
4928 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4929 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
4930 struct loop *loop_for_initial_load;
4932 gcc_assert (alignment_support_scheme == dr_explicit_realign
4933 || alignment_support_scheme == dr_explicit_realign_optimized);
4935 /* We need to generate three things:
4936 1. the misalignment computation
4937 2. the extra vector load (for the optimized realignment scheme).
4938 3. the phi node for the two vectors from which the realignment is
4939 done (for the optimized realignment scheme).
4942 /* 1. Determine where to generate the misalignment computation.
4944 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4945 calculation will be generated by this function, outside the loop (in the
4946 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4947 caller, inside the loop.
4949 Background: If the misalignment remains fixed throughout the iterations of
4950 the loop, then both realignment schemes are applicable, and also the
4951 misalignment computation can be done outside LOOP. This is because we are
4952 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4953 are a multiple of VS (the Vector Size), and therefore the misalignment in
4954 different vectorized LOOP iterations is always the same.
4955 The problem arises only if the memory access is in an inner-loop nested
4956 inside LOOP, which is now being vectorized using outer-loop vectorization.
4957 This is the only case when the misalignment of the memory access may not
4958 remain fixed throughout the iterations of the inner-loop (as explained in
4959 detail in vect_supportable_dr_alignment). In this case, not only is the
4960 optimized realignment scheme not applicable, but also the misalignment
4961 computation (and generation of the realignment token that is passed to
4962 REALIGN_LOAD) have to be done inside the loop.
4964 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4965 or not, which in turn determines if the misalignment is computed inside
4966 the inner-loop, or outside LOOP. */
4968 if (init_addr != NULL_TREE)
4970 compute_in_loop = true;
4971 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4975 /* 2. Determine where to generate the extra vector load.
4977 For the optimized realignment scheme, instead of generating two vector
4978 loads in each iteration, we generate a single extra vector load in the
4979 preheader of the loop, and in each iteration reuse the result of the
4980 vector load from the previous iteration. In case the memory access is in
4981 an inner-loop nested inside LOOP, which is now being vectorized using
4982 outer-loop vectorization, we need to determine whether this initial vector
4983 load should be generated at the preheader of the inner-loop, or can be
4984 generated at the preheader of LOOP. If the memory access has no evolution
4985 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4986 to be generated inside LOOP (in the preheader of the inner-loop). */
4988 if (nested_in_vect_loop)
4990 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4991 bool invariant_in_outerloop =
4992 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4993 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4996 loop_for_initial_load = loop;
4998 *at_loop = loop_for_initial_load;
5000 /* 3. For the case of the optimized realignment, create the first vector
5001 load at the loop preheader. */
5003 if (alignment_support_scheme == dr_explicit_realign_optimized)
5005 /* Create msq_init = *(floor(p1)) in the loop preheader */
5007 gcc_assert (!compute_in_loop);
5008 pe = loop_preheader_edge (loop_for_initial_load);
5009 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5010 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
5011 &init_addr, &inc, true, NULL_TREE, &inv_p);
5012 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5013 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5014 new_temp = make_ssa_name (vec_dest, new_stmt);
5015 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5016 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5017 gcc_assert (!new_bb);
5018 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
5021 /* 4. Create realignment token using a target builtin, if available.
5022 It is done either inside the containing loop, or before LOOP (as
5023 determined above). */
5025 if (targetm.vectorize.builtin_mask_for_load)
5029 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5030 if (compute_in_loop)
5031 gcc_assert (init_addr); /* already computed by the caller. */
5034 /* Generate the INIT_ADDR computation outside LOOP. */
5035 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5037 pe = loop_preheader_edge (loop);
5038 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
5039 gcc_assert (!new_bb);
5042 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5043 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
5044 vec_dest = vect_create_destination_var (scalar_dest,
5045 TREE_TYPE (new_stmt));
5046 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5047 new_temp = make_ssa_name (vec_dest, new_stmt);
5048 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5050 if (compute_in_loop)
5051 bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
5054 /* Generate the misalignment computation outside LOOP. */
5055 pe = loop_preheader_edge (loop);
5056 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5057 gcc_assert (!new_bb);
5060 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
5062 /* The result of the CALL_EXPR to this builtin is determined from
5063 the value of the parameter and no global variables are touched
5064 which makes the builtin a "const" function. Requiring the
5065 builtin to have the "const" attribute makes it unnecessary
5066 to call mark_call_clobbered. */
5067 gcc_assert (TREE_READONLY (builtin_decl));
5070 if (alignment_support_scheme == dr_explicit_realign)
5073 gcc_assert (!compute_in_loop);
5074 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5077 /* 5. Create msq = phi <msq_init, lsq> in loop */
5079 pe = loop_preheader_edge (containing_loop);
5080 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5081 msq = make_ssa_name (vec_dest, NULL_TREE);
5082 phi_stmt = create_phi_node (msq, containing_loop->header);
5083 SSA_NAME_DEF_STMT (msq) = phi_stmt;
5084 add_phi_arg (phi_stmt, msq_init, pe);
5090 /* Function vect_strided_load_supported.
5092 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5093 and FALSE otherwise. */
5096 vect_strided_load_supported (tree vectype)
5098 optab perm_even_optab, perm_odd_optab;
5101 mode = (int) TYPE_MODE (vectype);
5103 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
5104 if (!perm_even_optab)
5106 if (vect_print_dump_info (REPORT_DETAILS))
5107 fprintf (vect_dump, "no optab for perm_even.");
5111 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
5113 if (vect_print_dump_info (REPORT_DETAILS))
5114 fprintf (vect_dump, "perm_even op not supported by target.");
5118 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
5119 if (!perm_odd_optab)
5121 if (vect_print_dump_info (REPORT_DETAILS))
5122 fprintf (vect_dump, "no optab for perm_odd.");
5126 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
5128 if (vect_print_dump_info (REPORT_DETAILS))
5129 fprintf (vect_dump, "perm_odd op not supported by target.");
5136 /* Function vect_permute_load_chain.
5138 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5139 a power of 2, generate extract_even/odd stmts to reorder the input data
5140 correctly. Return the final references for loads in RESULT_CHAIN.
5142 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5143 The input is 4 vectors each containing 8 elements. We assign a number to each
5144 element, the input sequence is:
5146 1st vec: 0 1 2 3 4 5 6 7
5147 2nd vec: 8 9 10 11 12 13 14 15
5148 3rd vec: 16 17 18 19 20 21 22 23
5149 4th vec: 24 25 26 27 28 29 30 31
5151 The output sequence should be:
5153 1st vec: 0 4 8 12 16 20 24 28
5154 2nd vec: 1 5 9 13 17 21 25 29
5155 3rd vec: 2 6 10 14 18 22 26 30
5156 4th vec: 3 7 11 15 19 23 27 31
5158 i.e., the first output vector should contain the first elements of each
5159 interleaving group, etc.
5161 We use extract_even/odd instructions to create such output. The input of each
5162 extract_even/odd operation is two vectors
5166 and the output is the vector of extracted even/odd elements. The output of
5167 extract_even will be: 0 2 4 6
5168 and of extract_odd: 1 3 5 7
5171 The permutation is done in log LENGTH stages. In each stage extract_even and
5172 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5173 order. In our example,
5175 E1: extract_even (1st vec, 2nd vec)
5176 E2: extract_odd (1st vec, 2nd vec)
5177 E3: extract_even (3rd vec, 4th vec)
5178 E4: extract_odd (3rd vec, 4th vec)
5180 The output for the first stage will be:
5182 E1: 0 2 4 6 8 10 12 14
5183 E2: 1 3 5 7 9 11 13 15
5184 E3: 16 18 20 22 24 26 28 30
5185 E4: 17 19 21 23 25 27 29 31
5187 In order to proceed and create the correct sequence for the next stage (or
5188 for the correct output, if the second stage is the last one, as in our
5189 example), we first put the output of extract_even operation and then the
5190 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5191 The input for the second stage is:
5193 1st vec (E1): 0 2 4 6 8 10 12 14
5194 2nd vec (E3): 16 18 20 22 24 26 28 30
5195 3rd vec (E2): 1 3 5 7 9 11 13 15
5196 4th vec (E4): 17 19 21 23 25 27 29 31
5198 The output of the second stage:
5200 E1: 0 4 8 12 16 20 24 28
5201 E2: 2 6 10 14 18 22 26 30
5202 E3: 1 5 9 13 17 21 25 29
5203 E4: 3 7 11 15 19 23 27 31
5205 And RESULT_CHAIN after reordering:
5207 1st vec (E1): 0 4 8 12 16 20 24 28
5208 2nd vec (E3): 1 5 9 13 17 21 25 29
5209 3rd vec (E2): 2 6 10 14 18 22 26 30
5210 4th vec (E4): 3 7 11 15 19 23 27 31. */
5213 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
5214 unsigned int length,
5216 block_stmt_iterator *bsi,
5217 VEC(tree,heap) **result_chain)
5219 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
5220 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5225 /* Check that the operation is supported. */
5226 if (!vect_strided_load_supported (vectype))
5229 *result_chain = VEC_copy (tree, heap, dr_chain);
5230 for (i = 0; i < exact_log2 (length); i++)
5232 for (j = 0; j < length; j +=2)
5234 first_vect = VEC_index (tree, dr_chain, j);
5235 second_vect = VEC_index (tree, dr_chain, j+1);
5237 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5238 perm_dest = create_tmp_var (vectype, "vect_perm_even");
5239 DECL_GIMPLE_REG_P (perm_dest) = 1;
5240 add_referenced_var (perm_dest);
5242 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
5243 first_vect, second_vect);
5244 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5246 data_ref = make_ssa_name (perm_dest, perm_stmt);
5247 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5248 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5249 mark_symbols_for_renaming (perm_stmt);
5251 VEC_replace (tree, *result_chain, j/2, data_ref);
5253 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5254 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
5255 DECL_GIMPLE_REG_P (perm_dest) = 1;
5256 add_referenced_var (perm_dest);
5258 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
5259 first_vect, second_vect);
5260 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5261 data_ref = make_ssa_name (perm_dest, perm_stmt);
5262 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5263 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5264 mark_symbols_for_renaming (perm_stmt);
5266 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
5268 dr_chain = VEC_copy (tree, heap, *result_chain);
5274 /* Function vect_transform_strided_load.
5276 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5277 to perform their permutation and ascribe the result vectorized statements to
5278 the scalar statements.
5282 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
5283 block_stmt_iterator *bsi)
5285 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5286 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5287 tree next_stmt, new_stmt;
5288 VEC(tree,heap) *result_chain = NULL;
5289 unsigned int i, gap_count;
5292 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5293 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5294 vectors, that are ready for vector computation. */
5295 result_chain = VEC_alloc (tree, heap, size);
5297 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
5300 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5301 Since we scan the chain starting from it's first node, their order
5302 corresponds the order of data-refs in RESULT_CHAIN. */
5303 next_stmt = first_stmt;
5305 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
5310 /* Skip the gaps. Loads created for the gaps will be removed by dead
5311 code elimination pass later.
5312 DR_GROUP_GAP is the number of steps in elements from the previous
5313 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5314 correspond to the gaps.
5316 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
5324 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5325 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5326 copies, and we put the new vector statement in the first available
5328 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5329 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5332 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5333 tree rel_stmt = STMT_VINFO_RELATED_STMT (
5334 vinfo_for_stmt (prev_stmt));
5337 prev_stmt = rel_stmt;
5338 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5340 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
5342 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5344 /* If NEXT_STMT accesses the same DR as the previous statement,
5345 put the same TMP_DATA_REF as its vectorized statement; otherwise
5346 get the next data-ref from RESULT_CHAIN. */
5347 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5355 /* vectorizable_load.
5357 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5359 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5360 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5361 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5364 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
5368 tree vec_dest = NULL;
5369 tree data_ref = NULL;
5371 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5372 stmt_vec_info prev_stmt_info;
5373 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5374 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5375 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
5376 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5377 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
5378 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5381 tree new_stmt = NULL_TREE;
5383 enum dr_alignment_support alignment_support_scheme;
5384 tree dataref_ptr = NULL_TREE;
5386 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5387 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5388 int i, j, group_size;
5389 tree msq = NULL_TREE, lsq;
5390 tree offset = NULL_TREE;
5391 tree realignment_token = NULL_TREE;
5392 tree phi = NULL_TREE;
5393 VEC(tree,heap) *dr_chain = NULL;
5394 bool strided_load = false;
5398 bool compute_in_loop = false;
5399 struct loop *at_loop;
5401 bool slp = (slp_node != NULL);
5403 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
5404 this, so we can safely override NCOPIES with 1 here. */
5408 gcc_assert (ncopies >= 1);
5410 /* FORNOW. This restriction should be relaxed. */
5411 if (nested_in_vect_loop && ncopies > 1)
5413 if (vect_print_dump_info (REPORT_DETAILS))
5414 fprintf (vect_dump, "multiple types in nested loop.");
5418 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5421 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5424 /* FORNOW: not yet supported. */
5425 if (STMT_VINFO_LIVE_P (stmt_info))
5427 if (vect_print_dump_info (REPORT_DETAILS))
5428 fprintf (vect_dump, "value used after loop.");
5432 /* Is vectorizable load? */
5433 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5436 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5437 if (TREE_CODE (scalar_dest) != SSA_NAME)
5440 op = GIMPLE_STMT_OPERAND (stmt, 1);
5441 if (TREE_CODE (op) != ARRAY_REF
5442 && TREE_CODE (op) != INDIRECT_REF
5443 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
5446 if (!STMT_VINFO_DATA_REF (stmt_info))
5449 scalar_type = TREE_TYPE (DR_REF (dr));
5450 mode = (int) TYPE_MODE (vectype);
5452 /* FORNOW. In some cases can vectorize even if data-type not supported
5453 (e.g. - data copies). */
5454 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
5456 if (vect_print_dump_info (REPORT_DETAILS))
5457 fprintf (vect_dump, "Aligned load, but unsupported type.");
5461 /* Check if the load is a part of an interleaving chain. */
5462 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5464 strided_load = true;
5466 gcc_assert (! nested_in_vect_loop);
5468 /* Check if interleaving is supported. */
5469 if (!vect_strided_load_supported (vectype)
5470 && !PURE_SLP_STMT (stmt_info) && !slp)
5474 if (!vec_stmt) /* transformation not required. */
5476 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5477 vect_model_load_cost (stmt_info, ncopies, NULL);
5481 if (vect_print_dump_info (REPORT_DETAILS))
5482 fprintf (vect_dump, "transform load.");
5488 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5489 /* Check if the chain of loads is already vectorized. */
5490 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
5492 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5495 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5496 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
5497 dr_chain = VEC_alloc (tree, heap, group_size);
5499 /* VEC_NUM is the number of vect stmts to be created for this group. */
5502 strided_load = false;
5503 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5506 vec_num = group_size;
5512 group_size = vec_num = 1;
5515 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
5516 gcc_assert (alignment_support_scheme);
5518 /* In case the vectorization factor (VF) is bigger than the number
5519 of elements that we can fit in a vectype (nunits), we have to generate
5520 more than one vector stmt - i.e - we need to "unroll" the
5521 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5522 from one copy of the vector stmt to the next, in the field
5523 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5524 stages to find the correct vector defs to be used when vectorizing
5525 stmts that use the defs of the current stmt. The example below illustrates
5526 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
5527 4 vectorized stmts):
5529 before vectorization:
5530 RELATED_STMT VEC_STMT
5534 step 1: vectorize stmt S1:
5535 We first create the vector stmt VS1_0, and, as usual, record a
5536 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
5537 Next, we create the vector stmt VS1_1, and record a pointer to
5538 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
5539 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
5541 RELATED_STMT VEC_STMT
5542 VS1_0: vx0 = memref0 VS1_1 -
5543 VS1_1: vx1 = memref1 VS1_2 -
5544 VS1_2: vx2 = memref2 VS1_3 -
5545 VS1_3: vx3 = memref3 - -
5546 S1: x = load - VS1_0
5549 See in documentation in vect_get_vec_def_for_stmt_copy for how the
5550 information we recorded in RELATED_STMT field is used to vectorize
5553 /* In case of interleaving (non-unit strided access):
5560 Vectorized loads are created in the order of memory accesses
5561 starting from the access of the first stmt of the chain:
5564 VS2: vx1 = &base + vec_size*1
5565 VS3: vx3 = &base + vec_size*2
5566 VS4: vx4 = &base + vec_size*3
5568 Then permutation statements are generated:
5570 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
5571 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
5574 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5575 (the order of the data-refs in the output of vect_permute_load_chain
5576 corresponds to the order of scalar stmts in the interleaving chain - see
5577 the documentation of vect_permute_load_chain()).
5578 The generation of permutation stmts and recording them in
5579 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
5581 In case of both multiple types and interleaving, the vector loads and
5582 permutation stmts above are created for every copy. The result vector stmts
5583 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5584 STMT_VINFO_RELATED_STMT for the next copies. */
5586 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5587 on a target that supports unaligned accesses (dr_unaligned_supported)
5588 we generate the following code:
5592 p = p + indx * vectype_size;
5597 Otherwise, the data reference is potentially unaligned on a target that
5598 does not support unaligned accesses (dr_explicit_realign_optimized) -
5599 then generate the following code, in which the data in each iteration is
5600 obtained by two vector loads, one from the previous iteration, and one
5601 from the current iteration:
5603 msq_init = *(floor(p1))
5604 p2 = initial_addr + VS - 1;
5605 realignment_token = call target_builtin;
5608 p2 = p2 + indx * vectype_size
5610 vec_dest = realign_load (msq, lsq, realignment_token)
5615 /* If the misalignment remains the same throughout the execution of the
5616 loop, we can create the init_addr and permutation mask at the loop
5617 preheader. Otherwise, it needs to be created inside the loop.
5618 This can only occur when vectorizing memory accesses in the inner-loop
5619 nested within an outer-loop that is being vectorized. */
5621 if (nested_in_vect_loop_p (loop, stmt)
5622 && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
5624 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5625 compute_in_loop = true;
5628 if ((alignment_support_scheme == dr_explicit_realign_optimized
5629 || alignment_support_scheme == dr_explicit_realign)
5630 && !compute_in_loop)
5632 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token,
5633 alignment_support_scheme, NULL_TREE,
5635 if (alignment_support_scheme == dr_explicit_realign_optimized)
5637 phi = SSA_NAME_DEF_STMT (msq);
5638 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5644 prev_stmt_info = NULL;
5645 for (j = 0; j < ncopies; j++)
5647 /* 1. Create the vector pointer update chain. */
5649 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
5651 &dummy, &ptr_incr, false,
5655 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
5657 for (i = 0; i < vec_num; i++)
5660 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
5663 /* 2. Create the vector-load in the loop. */
5664 switch (alignment_support_scheme)
5667 gcc_assert (aligned_access_p (first_dr));
5668 data_ref = build_fold_indirect_ref (dataref_ptr);
5670 case dr_unaligned_supported:
5672 int mis = DR_MISALIGNMENT (first_dr);
5673 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
5675 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
5677 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
5680 case dr_explicit_realign:
5683 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5685 if (compute_in_loop)
5686 msq = vect_setup_realignment (first_stmt, bsi,
5688 dr_explicit_realign,
5691 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5692 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5693 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5694 new_temp = make_ssa_name (vec_dest, new_stmt);
5695 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5696 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5697 copy_virtual_operands (new_stmt, stmt);
5698 mark_symbols_for_renaming (new_stmt);
5701 bump = size_binop (MULT_EXPR, vs_minus_1,
5702 TYPE_SIZE_UNIT (scalar_type));
5703 ptr = bump_vector_ptr (dataref_ptr, NULL_TREE, bsi, stmt, bump);
5704 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5707 case dr_explicit_realign_optimized:
5708 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5713 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5714 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5715 new_temp = make_ssa_name (vec_dest, new_stmt);
5716 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5717 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5718 mark_symbols_for_renaming (new_stmt);
5720 /* 3. Handle explicit realignment if necessary/supported. Create in
5721 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
5722 if (alignment_support_scheme == dr_explicit_realign_optimized
5723 || alignment_support_scheme == dr_explicit_realign)
5725 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
5726 if (!realignment_token)
5727 realignment_token = dataref_ptr;
5728 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5729 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
5731 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5732 new_temp = make_ssa_name (vec_dest, new_stmt);
5733 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5734 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5736 if (alignment_support_scheme == dr_explicit_realign_optimized)
5738 if (i == vec_num - 1 && j == ncopies - 1)
5739 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
5744 /* 4. Handle invariant-load. */
5747 gcc_assert (!strided_load);
5748 gcc_assert (nested_in_vect_loop_p (loop, stmt));
5753 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
5755 /* CHECKME: bitpos depends on endianess? */
5756 bitpos = bitsize_zero_node;
5757 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
5759 BIT_FIELD_REF_UNSIGNED (vec_inv) =
5760 TYPE_UNSIGNED (scalar_type);
5762 vect_create_destination_var (scalar_dest, NULL_TREE);
5763 new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
5764 new_temp = make_ssa_name (vec_dest, new_stmt);
5765 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5766 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5768 for (k = nunits - 1; k >= 0; --k)
5769 t = tree_cons (NULL_TREE, new_temp, t);
5770 /* FIXME: use build_constructor directly. */
5771 vec_inv = build_constructor_from_list (vectype, t);
5772 new_temp = vect_init_vector (stmt, vec_inv, vectype, bsi);
5773 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5776 gcc_unreachable (); /* FORNOW. */
5779 /* Collect vector loads and later create their permutation in
5780 vect_transform_strided_load (). */
5782 VEC_quick_push (tree, dr_chain, new_temp);
5784 /* Store vector loads in the corresponding SLP_NODE. */
5786 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5789 /* FORNOW: SLP with multiple types is unsupported. */
5795 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
5797 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5798 dr_chain = VEC_alloc (tree, heap, group_size);
5803 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5805 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5806 prev_stmt_info = vinfo_for_stmt (new_stmt);
5814 /* Function vectorizable_live_operation.
5816 STMT computes a value that is used outside the loop. Check if
5817 it can be supported. */
5820 vectorizable_live_operation (tree stmt,
5821 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
5822 tree *vec_stmt ATTRIBUTE_UNUSED)
5825 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5826 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5827 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5832 enum vect_def_type dt;
5834 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5836 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5839 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5842 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
5845 /* FORNOW. CHECKME. */
5846 if (nested_in_vect_loop_p (loop, stmt))
5849 operation = GIMPLE_STMT_OPERAND (stmt, 1);
5850 op_type = TREE_OPERAND_LENGTH (operation);
5852 /* FORNOW: support only if all uses are invariant. This means
5853 that the scalar operations can remain in place, unvectorized.
5854 The original last scalar value that they compute will be used. */
5856 for (i = 0; i < op_type; i++)
5858 op = TREE_OPERAND (operation, i);
5859 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
5861 if (vect_print_dump_info (REPORT_DETAILS))
5862 fprintf (vect_dump, "use not simple.");
5866 if (dt != vect_invariant_def && dt != vect_constant_def)
5870 /* No transformation is required for the cases we currently support. */
5875 /* Function vect_is_simple_cond.
5878 LOOP - the loop that is being vectorized.
5879 COND - Condition that is checked for simple use.
5881 Returns whether a COND can be vectorized. Checks whether
5882 condition operands are supportable using vec_is_simple_use. */
5885 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
5889 enum vect_def_type dt;
5891 if (!COMPARISON_CLASS_P (cond))
5894 lhs = TREE_OPERAND (cond, 0);
5895 rhs = TREE_OPERAND (cond, 1);
5897 if (TREE_CODE (lhs) == SSA_NAME)
5899 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5900 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
5903 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5904 && TREE_CODE (lhs) != FIXED_CST)
5907 if (TREE_CODE (rhs) == SSA_NAME)
5909 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5910 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
5913 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5914 && TREE_CODE (rhs) != FIXED_CST)
5920 /* vectorizable_condition.
5922 Check if STMT is conditional modify expression that can be vectorized.
5923 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5924 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5927 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5930 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
5932 tree scalar_dest = NULL_TREE;
5933 tree vec_dest = NULL_TREE;
5934 tree op = NULL_TREE;
5935 tree cond_expr, then_clause, else_clause;
5936 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5937 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5938 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
5939 tree vec_compare, vec_cond_expr;
5941 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5942 enum machine_mode vec_mode;
5944 enum vect_def_type dt;
5945 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5946 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5948 gcc_assert (ncopies >= 1);
5950 return false; /* FORNOW */
5952 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5955 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5958 /* FORNOW: SLP not supported. */
5959 if (STMT_SLP_TYPE (stmt_info))
5962 /* FORNOW: not yet supported. */
5963 if (STMT_VINFO_LIVE_P (stmt_info))
5965 if (vect_print_dump_info (REPORT_DETAILS))
5966 fprintf (vect_dump, "value used after loop.");
5970 /* Is vectorizable conditional operation? */
5971 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5974 op = GIMPLE_STMT_OPERAND (stmt, 1);
5976 if (TREE_CODE (op) != COND_EXPR)
5979 cond_expr = TREE_OPERAND (op, 0);
5980 then_clause = TREE_OPERAND (op, 1);
5981 else_clause = TREE_OPERAND (op, 2);
5983 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
5986 /* We do not handle two different vector types for the condition
5988 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
5991 if (TREE_CODE (then_clause) == SSA_NAME)
5993 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5994 if (!vect_is_simple_use (then_clause, loop_vinfo,
5995 &then_def_stmt, &def, &dt))
5998 else if (TREE_CODE (then_clause) != INTEGER_CST
5999 && TREE_CODE (then_clause) != REAL_CST
6000 && TREE_CODE (then_clause) != FIXED_CST)
6003 if (TREE_CODE (else_clause) == SSA_NAME)
6005 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6006 if (!vect_is_simple_use (else_clause, loop_vinfo,
6007 &else_def_stmt, &def, &dt))
6010 else if (TREE_CODE (else_clause) != INTEGER_CST
6011 && TREE_CODE (else_clause) != REAL_CST
6012 && TREE_CODE (else_clause) != FIXED_CST)
6016 vec_mode = TYPE_MODE (vectype);
6020 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6021 return expand_vec_cond_expr_p (op, vec_mode);
6027 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
6028 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6030 /* Handle cond expr. */
6032 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
6034 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
6035 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
6036 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
6038 /* Arguments are ready. create the new vector stmt. */
6039 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
6040 vec_cond_lhs, vec_cond_rhs);
6041 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6042 vec_compare, vec_then_clause, vec_else_clause);
6044 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
6045 new_temp = make_ssa_name (vec_dest, *vec_stmt);
6046 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
6047 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
6053 /* Function vect_transform_stmt.
6055 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6058 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store,
6061 bool is_store = false;
6062 tree vec_stmt = NULL_TREE;
6063 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6064 tree orig_stmt_in_pattern;
6067 switch (STMT_VINFO_TYPE (stmt_info))
6069 case type_demotion_vec_info_type:
6070 gcc_assert (!slp_node);
6071 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
6075 case type_promotion_vec_info_type:
6076 gcc_assert (!slp_node);
6077 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
6081 case type_conversion_vec_info_type:
6082 done = vectorizable_conversion (stmt, bsi, &vec_stmt, slp_node);
6086 case induc_vec_info_type:
6087 gcc_assert (!slp_node);
6088 done = vectorizable_induction (stmt, bsi, &vec_stmt);
6092 case op_vec_info_type:
6093 done = vectorizable_operation (stmt, bsi, &vec_stmt, slp_node);
6097 case assignment_vec_info_type:
6098 done = vectorizable_assignment (stmt, bsi, &vec_stmt, slp_node);
6102 case load_vec_info_type:
6103 done = vectorizable_load (stmt, bsi, &vec_stmt, slp_node);
6107 case store_vec_info_type:
6108 done = vectorizable_store (stmt, bsi, &vec_stmt, slp_node);
6110 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6112 /* In case of interleaving, the whole chain is vectorized when the
6113 last store in the chain is reached. Store stmts before the last
6114 one are skipped, and there vec_stmt_info shouldn't be freed
6116 *strided_store = true;
6117 if (STMT_VINFO_VEC_STMT (stmt_info))
6124 case condition_vec_info_type:
6125 gcc_assert (!slp_node);
6126 done = vectorizable_condition (stmt, bsi, &vec_stmt);
6130 case call_vec_info_type:
6131 gcc_assert (!slp_node);
6132 done = vectorizable_call (stmt, bsi, &vec_stmt);
6135 case reduc_vec_info_type:
6136 gcc_assert (!slp_node);
6137 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
6142 if (!STMT_VINFO_LIVE_P (stmt_info))
6144 if (vect_print_dump_info (REPORT_DETAILS))
6145 fprintf (vect_dump, "stmt not supported.");
6150 if (STMT_VINFO_LIVE_P (stmt_info)
6151 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6153 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
6159 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6160 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
6161 if (orig_stmt_in_pattern)
6163 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
6164 /* STMT was inserted by the vectorizer to replace a computation idiom.
6165 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6166 computed this idiom. We need to record a pointer to VEC_STMT in
6167 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6168 documentation of vect_pattern_recog. */
6169 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
6171 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
6172 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
6181 /* This function builds ni_name = number of iterations loop executes
6182 on the loop preheader. */
6185 vect_build_loop_niters (loop_vec_info loop_vinfo)
6187 tree ni_name, stmt, var;
6189 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6190 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6192 var = create_tmp_var (TREE_TYPE (ni), "niters");
6193 add_referenced_var (var);
6194 ni_name = force_gimple_operand (ni, &stmt, false, var);
6196 pe = loop_preheader_edge (loop);
6199 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6200 gcc_assert (!new_bb);
6207 /* This function generates the following statements:
6209 ni_name = number of iterations loop executes
6210 ratio = ni_name / vf
6211 ratio_mult_vf_name = ratio * vf
6213 and places them at the loop preheader edge. */
6216 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6218 tree *ratio_mult_vf_name_ptr,
6219 tree *ratio_name_ptr)
6227 tree ratio_mult_vf_name;
6228 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6229 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
6230 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6233 pe = loop_preheader_edge (loop);
6235 /* Generate temporary variable that contains
6236 number of iterations loop executes. */
6238 ni_name = vect_build_loop_niters (loop_vinfo);
6239 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
6241 /* Create: ratio = ni >> log2(vf) */
6243 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
6244 if (!is_gimple_val (ratio_name))
6246 var = create_tmp_var (TREE_TYPE (ni), "bnd");
6247 add_referenced_var (var);
6249 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
6250 pe = loop_preheader_edge (loop);
6251 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6252 gcc_assert (!new_bb);
6255 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6257 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6258 ratio_name, log_vf);
6259 if (!is_gimple_val (ratio_mult_vf_name))
6261 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
6262 add_referenced_var (var);
6264 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
6266 pe = loop_preheader_edge (loop);
6267 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6268 gcc_assert (!new_bb);
6271 *ni_name_ptr = ni_name;
6272 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6273 *ratio_name_ptr = ratio_name;
6279 /* Function vect_update_ivs_after_vectorizer.
6281 "Advance" the induction variables of LOOP to the value they should take
6282 after the execution of LOOP. This is currently necessary because the
6283 vectorizer does not handle induction variables that are used after the
6284 loop. Such a situation occurs when the last iterations of LOOP are
6286 1. We introduced new uses after LOOP for IVs that were not originally used
6287 after LOOP: the IVs of LOOP are now used by an epilog loop.
6288 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6289 times, whereas the loop IVs should be bumped N times.
6292 - LOOP - a loop that is going to be vectorized. The last few iterations
6293 of LOOP were peeled.
6294 - NITERS - the number of iterations that LOOP executes (before it is
6295 vectorized). i.e, the number of times the ivs should be bumped.
6296 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6297 coming out from LOOP on which there are uses of the LOOP ivs
6298 (this is the path from LOOP->exit to epilog_loop->preheader).
6300 The new definitions of the ivs are placed in LOOP->exit.
6301 The phi args associated with the edge UPDATE_E in the bb
6302 UPDATE_E->dest are updated accordingly.
6304 Assumption 1: Like the rest of the vectorizer, this function assumes
6305 a single loop exit that has a single predecessor.
6307 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6308 organized in the same order.
6310 Assumption 3: The access function of the ivs is simple enough (see
6311 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6313 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6314 coming out of LOOP on which the ivs of LOOP are used (this is the path
6315 that leads to the epilog loop; other paths skip the epilog loop). This
6316 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6317 needs to have its phis updated.
6321 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
6324 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6325 basic_block exit_bb = single_exit (loop)->dest;
6327 basic_block update_bb = update_e->dest;
6329 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6331 /* Make sure there exists a single-predecessor exit bb: */
6332 gcc_assert (single_pred_p (exit_bb));
6334 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
6336 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
6338 tree access_fn = NULL;
6339 tree evolution_part;
6342 tree var, ni, ni_name;
6343 block_stmt_iterator last_bsi;
6345 if (vect_print_dump_info (REPORT_DETAILS))
6347 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
6348 print_generic_expr (vect_dump, phi, TDF_SLIM);
6351 /* Skip virtual phi's. */
6352 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
6354 if (vect_print_dump_info (REPORT_DETAILS))
6355 fprintf (vect_dump, "virtual phi. skip.");
6359 /* Skip reduction phis. */
6360 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
6362 if (vect_print_dump_info (REPORT_DETAILS))
6363 fprintf (vect_dump, "reduc phi. skip.");
6367 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
6368 gcc_assert (access_fn);
6370 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
6371 gcc_assert (evolution_part != NULL_TREE);
6373 /* FORNOW: We do not support IVs whose evolution function is a polynomial
6374 of degree >= 2 or exponential. */
6375 gcc_assert (!tree_is_chrec (evolution_part));
6377 step_expr = evolution_part;
6378 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
6381 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
6382 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
6384 fold_convert (sizetype,
6385 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
6386 niters, step_expr)));
6388 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
6389 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
6390 fold_convert (TREE_TYPE (init_expr),
6397 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
6398 add_referenced_var (var);
6400 last_bsi = bsi_last (exit_bb);
6401 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
6402 true, BSI_SAME_STMT);
6404 /* Fix phi expressions in the successor bb. */
6405 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
6410 /* Function vect_do_peeling_for_loop_bound
6412 Peel the last iterations of the loop represented by LOOP_VINFO.
6413 The peeled iterations form a new epilog loop. Given that the loop now
6414 iterates NITERS times, the new epilog loop iterates
6415 NITERS % VECTORIZATION_FACTOR times.
6417 The original loop will later be made to iterate
6418 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
6421 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
6423 tree ni_name, ratio_mult_vf_name;
6424 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6425 struct loop *new_loop;
6427 basic_block preheader;
6430 int min_scalar_loop_bound;
6431 int min_profitable_iters;
6433 if (vect_print_dump_info (REPORT_DETAILS))
6434 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
6436 initialize_original_copy_tables ();
6438 /* Generate the following variables on the preheader of original loop:
6440 ni_name = number of iteration the original loop executes
6441 ratio = ni_name / vf
6442 ratio_mult_vf_name = ratio * vf */
6443 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
6444 &ratio_mult_vf_name, ratio);
6446 loop_num = loop->num;
6448 /* Analyze cost to set threshhold for vectorized loop. */
6449 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
6450 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
6451 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
6453 /* Use the cost model only if it is more conservative than user specified
6456 th = (unsigned) min_scalar_loop_bound;
6457 if (min_profitable_iters
6458 && (!min_scalar_loop_bound
6459 || min_profitable_iters > min_scalar_loop_bound))
6460 th = (unsigned) min_profitable_iters;
6462 if (((LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
6463 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6464 && vect_print_dump_info (REPORT_DETAILS))
6465 fprintf (vect_dump, "vectorization may not be profitable.");
6467 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
6468 ratio_mult_vf_name, ni_name, false,
6470 gcc_assert (new_loop);
6471 gcc_assert (loop_num == loop->num);
6472 #ifdef ENABLE_CHECKING
6473 slpeel_verify_cfg_after_peeling (loop, new_loop);
6476 /* A guard that controls whether the new_loop is to be executed or skipped
6477 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
6478 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
6479 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
6480 is on the path where the LOOP IVs are used and need to be updated. */
6482 preheader = loop_preheader_edge (new_loop)->src;
6483 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
6484 update_e = EDGE_PRED (preheader, 0);
6486 update_e = EDGE_PRED (preheader, 1);
6488 /* Update IVs of original loop as if they were advanced
6489 by ratio_mult_vf_name steps. */
6490 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
6492 /* After peeling we have to reset scalar evolution analyzer. */
6495 free_original_copy_tables ();
6499 /* Function vect_gen_niters_for_prolog_loop
6501 Set the number of iterations for the loop represented by LOOP_VINFO
6502 to the minimum between LOOP_NITERS (the original iteration count of the loop)
6503 and the misalignment of DR - the data reference recorded in
6504 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
6505 this loop, the data reference DR will refer to an aligned location.
6507 The following computation is generated:
6509 If the misalignment of DR is known at compile time:
6510 addr_mis = int mis = DR_MISALIGNMENT (dr);
6511 Else, compute address misalignment in bytes:
6512 addr_mis = addr & (vectype_size - 1)
6514 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
6516 (elem_size = element type size; an element is the scalar element
6517 whose type is the inner type of the vectype)
6521 prolog_niters = min ( LOOP_NITERS ,
6522 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
6523 where group_size is the size of the interleaved group.
6525 The above formulas assume that VF == number of elements in the vector. This
6526 may not hold when there are multiple-types in the loop.
6527 In this case, for some data-references in the loop the VF does not represent
6528 the number of elements that fit in the vector. Therefore, instead of VF we
6529 use TYPE_VECTOR_SUBPARTS. */
6532 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
6534 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
6535 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6537 tree iters, iters_name;
6540 tree dr_stmt = DR_STMT (dr);
6541 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
6542 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6543 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
6544 tree niters_type = TREE_TYPE (loop_niters);
6546 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
6547 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
6549 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6551 /* For interleaved access element size must be multiplied by the size of
6552 the interleaved group. */
6553 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
6554 DR_GROUP_FIRST_DR (stmt_info)));
6555 element_size *= group_size;
6558 pe = loop_preheader_edge (loop);
6560 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
6562 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
6563 int elem_misalign = byte_misalign / element_size;
6565 if (vect_print_dump_info (REPORT_DETAILS))
6566 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
6567 iters = build_int_cst (niters_type,
6568 (nelements - elem_misalign)&(nelements/group_size-1));
6572 tree new_stmts = NULL_TREE;
6573 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
6574 &new_stmts, NULL_TREE, loop);
6575 tree ptr_type = TREE_TYPE (start_addr);
6576 tree size = TYPE_SIZE (ptr_type);
6577 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
6578 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
6579 tree elem_size_log =
6580 build_int_cst (type, exact_log2 (vectype_align/nelements));
6581 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
6582 tree nelements_tree = build_int_cst (type, nelements);
6586 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
6587 gcc_assert (!new_bb);
6589 /* Create: byte_misalign = addr & (vectype_size - 1) */
6591 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
6593 /* Create: elem_misalign = byte_misalign / element_size */
6595 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
6597 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
6598 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
6599 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
6600 iters = fold_convert (niters_type, iters);
6603 /* Create: prolog_loop_niters = min (iters, loop_niters) */
6604 /* If the loop bound is known at compile time we already verified that it is
6605 greater than vf; since the misalignment ('iters') is at most vf, there's
6606 no need to generate the MIN_EXPR in this case. */
6607 if (TREE_CODE (loop_niters) != INTEGER_CST)
6608 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
6610 if (vect_print_dump_info (REPORT_DETAILS))
6612 fprintf (vect_dump, "niters for prolog loop: ");
6613 print_generic_expr (vect_dump, iters, TDF_SLIM);
6616 var = create_tmp_var (niters_type, "prolog_loop_niters");
6617 add_referenced_var (var);
6618 iters_name = force_gimple_operand (iters, &stmt, false, var);
6620 /* Insert stmt on loop preheader edge. */
6623 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6624 gcc_assert (!new_bb);
6631 /* Function vect_update_init_of_dr
6633 NITERS iterations were peeled from LOOP. DR represents a data reference
6634 in LOOP. This function updates the information recorded in DR to
6635 account for the fact that the first NITERS iterations had already been
6636 executed. Specifically, it updates the OFFSET field of DR. */
6639 vect_update_init_of_dr (struct data_reference *dr, tree niters)
6641 tree offset = DR_OFFSET (dr);
6643 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
6644 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
6645 DR_OFFSET (dr) = offset;
6649 /* Function vect_update_inits_of_drs
6651 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
6652 This function updates the information recorded for the data references in
6653 the loop to account for the fact that the first NITERS iterations had
6654 already been executed. Specifically, it updates the initial_condition of
6655 the access_function of all the data_references in the loop. */
6658 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
6661 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
6662 struct data_reference *dr;
6664 if (vect_print_dump_info (REPORT_DETAILS))
6665 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
6667 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
6668 vect_update_init_of_dr (dr, niters);
6672 /* Function vect_do_peeling_for_alignment
6674 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
6675 'niters' is set to the misalignment of one of the data references in the
6676 loop, thereby forcing it to refer to an aligned location at the beginning
6677 of the execution of this loop. The data reference for which we are
6678 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
6681 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
6683 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6684 tree niters_of_prolog_loop, ni_name;
6686 struct loop *new_loop;
6688 if (vect_print_dump_info (REPORT_DETAILS))
6689 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
6691 initialize_original_copy_tables ();
6693 ni_name = vect_build_loop_niters (loop_vinfo);
6694 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
6696 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
6698 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
6699 niters_of_prolog_loop, ni_name, true, 0);
6700 gcc_assert (new_loop);
6701 #ifdef ENABLE_CHECKING
6702 slpeel_verify_cfg_after_peeling (new_loop, loop);
6705 /* Update number of times loop executes. */
6706 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
6707 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
6708 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
6710 /* Update the init conditions of the access functions of all data refs. */
6711 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
6713 /* After peeling we have to reset scalar evolution analyzer. */
6716 free_original_copy_tables ();
6720 /* Function vect_create_cond_for_align_checks.
6722 Create a conditional expression that represents the alignment checks for
6723 all of data references (array element references) whose alignment must be
6727 LOOP_VINFO - two fields of the loop information are used.
6728 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
6729 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
6732 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6734 The returned value is the conditional expression to be used in the if
6735 statement that controls which version of the loop gets executed at runtime.
6737 The algorithm makes two assumptions:
6738 1) The number of bytes "n" in a vector is a power of 2.
6739 2) An address "a" is aligned if a%n is zero and that this
6740 test can be done as a&(n-1) == 0. For example, for 16
6741 byte vectors the test is a&0xf == 0. */
6744 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
6745 tree *cond_expr_stmt_list)
6747 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6748 VEC(tree,heap) *may_misalign_stmts
6749 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
6751 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
6755 tree int_ptrsize_type;
6757 tree or_tmp_name = NULL_TREE;
6758 tree and_tmp, and_tmp_name, and_stmt;
6761 /* Check that mask is one less than a power of 2, i.e., mask is
6762 all zeros followed by all ones. */
6763 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
6765 /* CHECKME: what is the best integer or unsigned type to use to hold a
6766 cast from a pointer value? */
6767 psize = TYPE_SIZE (ptr_type_node);
6769 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
6771 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
6772 of the first vector of the i'th data reference. */
6774 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
6776 tree new_stmt_list = NULL_TREE;
6778 tree addr_tmp, addr_tmp_name, addr_stmt;
6779 tree or_tmp, new_or_tmp_name, or_stmt;
6781 /* create: addr_tmp = (int)(address_of_first_vector) */
6782 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
6783 &new_stmt_list, NULL_TREE, loop);
6785 if (new_stmt_list != NULL_TREE)
6786 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
6788 sprintf (tmp_name, "%s%d", "addr2int", i);
6789 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6790 add_referenced_var (addr_tmp);
6791 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
6792 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
6793 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
6794 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
6795 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
6797 /* The addresses are OR together. */
6799 if (or_tmp_name != NULL_TREE)
6801 /* create: or_tmp = or_tmp | addr_tmp */
6802 sprintf (tmp_name, "%s%d", "orptrs", i);
6803 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6804 add_referenced_var (or_tmp);
6805 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
6806 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
6807 or_tmp_name, addr_tmp_name);
6808 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
6809 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
6810 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
6811 or_tmp_name = new_or_tmp_name;
6814 or_tmp_name = addr_tmp_name;
6818 mask_cst = build_int_cst (int_ptrsize_type, mask);
6820 /* create: and_tmp = or_tmp & mask */
6821 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
6822 add_referenced_var (and_tmp);
6823 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
6825 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
6826 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
6827 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
6828 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
6830 /* Make and_tmp the left operand of the conditional test against zero.
6831 if and_tmp has a nonzero bit then some address is unaligned. */
6832 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
6833 return build2 (EQ_EXPR, boolean_type_node,
6834 and_tmp_name, ptrsize_zero);
6837 /* Function vect_vfa_segment_size.
6839 Create an expression that computes the size of segment
6840 that will be accessed for a data reference. The functions takes into
6841 account that realignment loads may access one more vector.
6844 DR: The data reference.
6845 VECT_FACTOR: vectorization factor.
6847 Return an expression whose value is the size of segment which will be
6851 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
6853 tree segment_length;
6855 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
6858 build_int_cst (integer_type_node,
6859 GET_MODE_SIZE (TYPE_MODE (STMT_VINFO_VECTYPE
6860 (vinfo_for_stmt (DR_STMT (dr))))));
6863 fold_convert (sizetype,
6864 fold_build2 (PLUS_EXPR, integer_type_node,
6865 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
6872 fold_convert (sizetype,
6873 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
6877 return segment_length;
6880 /* Function vect_create_cond_for_alias_checks.
6882 Create a conditional expression that represents the run-time checks for
6883 overlapping of address ranges represented by a list of data references
6884 relations passed as input.
6887 COND_EXPR - input conditional expression. New conditions will be chained
6888 with logical and operation.
6889 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
6893 COND_EXPR - conditional expression.
6894 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6896 The returned value is the conditional expression to be used in the if
6897 statement that controls which version of the loop gets executed at runtime.
6901 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
6903 tree * cond_expr_stmt_list)
6905 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6906 VEC (ddr_p, heap) * may_alias_ddrs =
6907 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
6909 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
6913 tree part_cond_expr;
6915 /* Create expression
6916 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
6917 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
6921 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
6922 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
6924 if (VEC_empty (ddr_p, may_alias_ddrs))
6927 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
6929 tree stmt_a = DR_STMT (DDR_A (ddr));
6930 tree stmt_b = DR_STMT (DDR_B (ddr));
6933 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
6936 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
6939 tree segment_length_a = vect_vfa_segment_size (DDR_A (ddr), vect_factor);
6940 tree segment_length_b = vect_vfa_segment_size (DDR_B (ddr), vect_factor);
6942 if (vect_print_dump_info (REPORT_DR_DETAILS))
6945 "create runtime check for data references ");
6946 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
6947 fprintf (vect_dump, " and ");
6948 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
6953 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
6954 fold_build2 (LT_EXPR, boolean_type_node,
6955 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
6959 fold_build2 (LT_EXPR, boolean_type_node,
6960 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
6966 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
6967 *cond_expr, part_cond_expr);
6969 *cond_expr = part_cond_expr;
6971 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6972 fprintf (vect_dump, "created %u versioning for alias checks.\n",
6973 VEC_length (ddr_p, may_alias_ddrs));
6977 /* Remove a group of stores (for SLP or interleaving), free their
6981 vect_remove_stores (tree first_stmt)
6984 tree next = first_stmt;
6986 stmt_vec_info next_stmt_info;
6987 block_stmt_iterator next_si;
6991 /* Free the attached stmt_vec_info and remove the stmt. */
6992 next_si = bsi_for_stmt (next);
6993 bsi_remove (&next_si, true);
6994 next_stmt_info = vinfo_for_stmt (next);
6995 ann = stmt_ann (next);
6996 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
6997 free (next_stmt_info);
6998 set_stmt_info (ann, NULL);
7004 /* Vectorize SLP instance tree in postorder. */
7007 vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
7010 bool strided_store, is_store;
7011 block_stmt_iterator si;
7012 stmt_vec_info stmt_info;
7017 vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
7018 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
7020 stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0);
7021 stmt_info = vinfo_for_stmt (stmt);
7022 SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size);
7023 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
7025 if (vect_print_dump_info (REPORT_DETAILS))
7027 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
7028 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7031 si = bsi_for_stmt (stmt);
7032 is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
7035 if (DR_GROUP_FIRST_DR (stmt_info))
7036 /* If IS_STORE is TRUE, the vectorization of the
7037 interleaving chain was completed - free all the stores in
7039 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
7041 /* FORNOW: SLP originates only from strided stores. */
7047 /* FORNOW: SLP originates only from strided stores. */
7053 vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
7055 VEC (slp_instance, heap) *slp_instances =
7056 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
7057 slp_instance instance;
7058 unsigned int vec_stmts_size;
7059 unsigned int group_size, i;
7060 unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7061 bool is_store = false;
7063 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
7065 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
7066 /* For each SLP instance calculate number of vector stmts to be created
7067 for the scalar stmts in each node of the SLP tree. Number of vector
7068 elements in one vector iteration is the number of scalar elements in
7069 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7071 vec_stmts_size = vectorization_factor * group_size / nunits;
7073 /* Schedule the tree of INSTANCE. */
7074 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
7077 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
7078 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
7079 fprintf (vect_dump, "vectorizing stmts using SLP.");
7086 /* Function vect_transform_loop.
7088 The analysis phase has determined that the loop is vectorizable.
7089 Vectorize the loop - created vectorized stmts to replace the scalar
7090 stmts in the loop, and update the loop exit condition. */
7093 vect_transform_loop (loop_vec_info loop_vinfo)
7095 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7096 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7097 int nbbs = loop->num_nodes;
7098 block_stmt_iterator si, next_si;
7101 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7103 bool slp_scheduled = false;
7104 unsigned int nunits;
7106 if (vect_print_dump_info (REPORT_DETAILS))
7107 fprintf (vect_dump, "=== vec_transform_loop ===");
7109 /* If the loop has data references that may or may not be aligned or/and
7110 has data reference relations whose independence was not proven then
7111 two versions of the loop need to be generated, one which is vectorized
7112 and one which isn't. A test is then generated to control which of the
7113 loops is executed. The test checks for the alignment of all of the
7114 data references that may or may not be aligned. An additional
7115 sequence of runtime tests is generated for each pairs of DDRs whose
7116 independence was not proven. The vectorized version of loop is
7117 executed only if both alias and alignment tests are passed. */
7119 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7120 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7123 tree cond_expr = NULL_TREE;
7124 tree cond_expr_stmt_list = NULL_TREE;
7125 basic_block condition_bb;
7126 block_stmt_iterator cond_exp_bsi;
7127 basic_block merge_bb;
7128 basic_block new_exit_bb;
7130 tree orig_phi, new_phi, arg;
7131 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
7132 tree gimplify_stmt_list;
7134 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
7136 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
7138 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7139 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
7140 &cond_expr_stmt_list);
7143 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
7145 force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
7147 append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
7149 initialize_original_copy_tables ();
7150 nloop = loop_version (loop, cond_expr, &condition_bb,
7151 prob, prob, REG_BR_PROB_BASE - prob, true);
7152 free_original_copy_tables();
7154 /** Loop versioning violates an assumption we try to maintain during
7155 vectorization - that the loop exit block has a single predecessor.
7156 After versioning, the exit block of both loop versions is the same
7157 basic block (i.e. it has two predecessors). Just in order to simplify
7158 following transformations in the vectorizer, we fix this situation
7159 here by adding a new (empty) block on the exit-edge of the loop,
7160 with the proper loop-exit phis to maintain loop-closed-form. **/
7162 merge_bb = single_exit (loop)->dest;
7163 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
7164 new_exit_bb = split_edge (single_exit (loop));
7165 new_exit_e = single_exit (loop);
7166 e = EDGE_SUCC (new_exit_bb, 0);
7168 for (orig_phi = phi_nodes (merge_bb); orig_phi;
7169 orig_phi = PHI_CHAIN (orig_phi))
7171 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
7173 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
7174 add_phi_arg (new_phi, arg, new_exit_e);
7175 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
7178 /** end loop-exit-fixes after versioning **/
7180 update_ssa (TODO_update_ssa);
7181 cond_exp_bsi = bsi_last (condition_bb);
7182 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
7185 /* CHECKME: we wouldn't need this if we called update_ssa once
7187 bitmap_zero (vect_memsyms_to_rename);
7189 /* Peel the loop if there are data refs with unknown alignment.
7190 Only one data ref with unknown store is allowed. */
7192 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
7193 vect_do_peeling_for_alignment (loop_vinfo);
7195 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7196 compile time constant), or it is a constant that doesn't divide by the
7197 vectorization factor, then an epilog loop needs to be created.
7198 We therefore duplicate the loop: the original loop will be vectorized,
7199 and will compute the first (n/VF) iterations. The second copy of the loop
7200 will remain scalar and will compute the remaining (n%VF) iterations.
7201 (VF is the vectorization factor). */
7203 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7204 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7205 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
7206 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
7208 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7209 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
7211 /* 1) Make sure the loop header has exactly two entries
7212 2) Make sure we have a preheader basic block. */
7214 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7216 split_edge (loop_preheader_edge (loop));
7218 /* FORNOW: the vectorizer supports only loops which body consist
7219 of one basic block (header + empty latch). When the vectorizer will
7220 support more involved loop forms, the order by which the BBs are
7221 traversed need to be reconsidered. */
7223 for (i = 0; i < nbbs; i++)
7225 basic_block bb = bbs[i];
7226 stmt_vec_info stmt_info;
7229 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
7231 if (vect_print_dump_info (REPORT_DETAILS))
7233 fprintf (vect_dump, "------>vectorizing phi: ");
7234 print_generic_expr (vect_dump, phi, TDF_SLIM);
7236 stmt_info = vinfo_for_stmt (phi);
7240 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7241 && !STMT_VINFO_LIVE_P (stmt_info))
7244 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7245 != (unsigned HOST_WIDE_INT) vectorization_factor)
7246 && vect_print_dump_info (REPORT_DETAILS))
7247 fprintf (vect_dump, "multiple-types.");
7249 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
7251 if (vect_print_dump_info (REPORT_DETAILS))
7252 fprintf (vect_dump, "transform phi.");
7253 vect_transform_stmt (phi, NULL, NULL, NULL);
7257 for (si = bsi_start (bb); !bsi_end_p (si);)
7259 tree stmt = bsi_stmt (si);
7262 if (vect_print_dump_info (REPORT_DETAILS))
7264 fprintf (vect_dump, "------>vectorizing statement: ");
7265 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7268 stmt_info = vinfo_for_stmt (stmt);
7270 /* vector stmts created in the outer-loop during vectorization of
7271 stmts in an inner-loop may not have a stmt_info, and do not
7272 need to be vectorized. */
7279 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7280 && !STMT_VINFO_LIVE_P (stmt_info))
7286 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
7288 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7289 if (!STMT_SLP_TYPE (stmt_info)
7290 && nunits != (unsigned int) vectorization_factor
7291 && vect_print_dump_info (REPORT_DETAILS))
7292 /* For SLP VF is set according to unrolling factor, and not to
7293 vector size, hence for SLP this print is not valid. */
7294 fprintf (vect_dump, "multiple-types.");
7296 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7298 if (STMT_SLP_TYPE (stmt_info))
7302 slp_scheduled = true;
7304 if (vect_print_dump_info (REPORT_DETAILS))
7305 fprintf (vect_dump, "=== scheduling SLP instances ===");
7307 is_store = vect_schedule_slp (loop_vinfo, nunits);
7309 /* IS_STORE is true if STMT is a store. Stores cannot be of
7310 hybrid SLP type. They are removed in
7311 vect_schedule_slp_instance and their vinfo is destroyed. */
7319 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7320 if (PURE_SLP_STMT (stmt_info))
7327 /* -------- vectorize statement ------------ */
7328 if (vect_print_dump_info (REPORT_DETAILS))
7329 fprintf (vect_dump, "transform statement.");
7331 strided_store = false;
7332 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
7336 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
7338 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7339 interleaving chain was completed - free all the stores in
7341 tree next = DR_GROUP_FIRST_DR (stmt_info);
7343 stmt_vec_info next_stmt_info;
7347 next_si = bsi_for_stmt (next);
7348 next_stmt_info = vinfo_for_stmt (next);
7349 /* Free the attached stmt_vec_info and remove the stmt. */
7350 ann = stmt_ann (next);
7351 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7352 free (next_stmt_info);
7353 set_stmt_info (ann, NULL);
7354 bsi_remove (&next_si, true);
7357 bsi_remove (&si, true);
7362 /* Free the attached stmt_vec_info and remove the stmt. */
7363 ann = stmt_ann (stmt);
7365 set_stmt_info (ann, NULL);
7366 bsi_remove (&si, true);
7374 slpeel_make_loop_iterate_ntimes (loop, ratio);
7376 mark_set_for_renaming (vect_memsyms_to_rename);
7378 /* The memory tags and pointers in vectorized statements need to
7379 have their SSA forms updated. FIXME, why can't this be delayed
7380 until all the loops have been transformed? */
7381 update_ssa (TODO_update_ssa);
7383 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7384 fprintf (vect_dump, "LOOP VECTORIZED.");
7385 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7386 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");