1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *, slp_tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, struct loop*, tree, tree *, tree *, bool, tree, bool *);
53 static tree vect_create_addr_base_for_vector_ref
54 (tree, tree *, tree, struct loop *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree, block_stmt_iterator *);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static int vect_min_worthwhile_factor (enum tree_code);
76 cost_for_stmt (tree stmt)
78 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
80 switch (STMT_VINFO_TYPE (stmt_info))
82 case load_vec_info_type:
83 return TARG_SCALAR_LOAD_COST;
84 case store_vec_info_type:
85 return TARG_SCALAR_STORE_COST;
86 case op_vec_info_type:
87 case condition_vec_info_type:
88 case assignment_vec_info_type:
89 case reduc_vec_info_type:
90 case induc_vec_info_type:
91 case type_promotion_vec_info_type:
92 case type_demotion_vec_info_type:
93 case type_conversion_vec_info_type:
94 case call_vec_info_type:
95 return TARG_SCALAR_STMT_COST;
96 case undef_vec_info_type:
103 /* Function vect_estimate_min_profitable_iters
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
113 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
116 int min_profitable_iters;
117 int peel_iters_prologue;
118 int peel_iters_epilogue;
119 int vec_inside_cost = 0;
120 int vec_outside_cost = 0;
121 int scalar_single_iter_cost = 0;
122 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
125 int nbbs = loop->num_nodes;
127 int innerloop_iters = 0, factor;
128 VEC (slp_instance, heap) *slp_instances;
129 slp_instance instance;
131 /* Cost model disabled. */
132 if (!flag_vect_cost_model)
134 if (vect_print_dump_info (REPORT_DETAILS))
135 fprintf (vect_dump, "cost model disabled.");
139 /* Requires loop versioning tests to handle misalignment.
140 FIXME: Make cost depend on number of stmts in may_misalign list. */
142 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
144 vec_outside_cost += TARG_COND_BRANCH_COST;
145 if (vect_print_dump_info (REPORT_DETAILS))
146 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
150 /* Count statements in scalar loop. Using this as scalar cost for a single
153 TODO: Add outer loop support.
155 TODO: Consider assigning different costs to different scalar
160 innerloop_iters = 50; /* FIXME */
162 for (i = 0; i < nbbs; i++)
164 block_stmt_iterator si;
165 basic_block bb = bbs[i];
167 if (bb->loop_father == loop->inner)
168 factor = innerloop_iters;
172 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
174 tree stmt = bsi_stmt (si);
175 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
176 if (!STMT_VINFO_RELEVANT_P (stmt_info)
177 && !STMT_VINFO_LIVE_P (stmt_info))
179 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
180 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
181 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
182 some of the "outside" costs are generated inside the outer-loop. */
183 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
187 /* Add additional cost for the peeled instructions in prologue and epilogue
190 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
191 at compile-time - we assume it's (vf-1)/2 (the worst would be vf-1).
193 TODO: Build an expression that represents peel_iters for prologue and
194 epilogue to be used in a run-time test. */
196 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
198 if (byte_misalign < 0)
200 peel_iters_prologue = (vf - 1)/2;
201 if (vect_print_dump_info (REPORT_DETAILS))
202 fprintf (vect_dump, "cost model: "
203 "prologue peel iters set to (vf-1)/2.");
205 /* If peeling for alignment is unknown, loop bound of main loop becomes
207 peel_iters_epilogue = (vf - 1)/2;
208 if (vect_print_dump_info (REPORT_DETAILS))
209 fprintf (vect_dump, "cost model: "
210 "epilogue peel iters set to (vf-1)/2 because "
211 "peeling for alignment is unknown .");
217 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
218 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
219 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
220 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
222 peel_iters_prologue = nelements - (byte_misalign / element_size);
225 peel_iters_prologue = 0;
227 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
229 peel_iters_epilogue = (vf - 1)/2;
230 if (vect_print_dump_info (REPORT_DETAILS))
231 fprintf (vect_dump, "cost model: "
232 "epilogue peel iters set to (vf-1)/2 because "
233 "loop iterations are unknown .");
237 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
238 peel_iters_prologue = niters < peel_iters_prologue ?
239 niters : peel_iters_prologue;
240 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
244 /* Requires a prologue loop when peeling to handle misalignment. Add cost of
245 two guards, one for the peeled loop and one for the vector loop. */
247 if (peel_iters_prologue)
249 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
250 if (vect_print_dump_info (REPORT_DETAILS))
251 fprintf (vect_dump, "cost model: Adding cost of checks for "
255 /* Requires an epilogue loop to finish up remaining iterations after vector
256 loop. Add cost of two guards, one for the peeled loop and one for the
259 if (peel_iters_epilogue
260 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
261 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vf)
263 vec_outside_cost += 2 * TARG_COND_BRANCH_COST;
264 if (vect_print_dump_info (REPORT_DETAILS))
265 fprintf (vect_dump, "cost model : Adding cost of checks for "
269 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
270 + (peel_iters_epilogue * scalar_single_iter_cost);
272 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
273 information we provide for the target is whether testing against the
274 threshold involves a runtime test. */
275 if (targetm.vectorize.builtin_vectorization_cost)
277 bool runtime_test = false;
279 /* If the number of iterations is unknown, or the
280 peeling-for-misalignment amount is unknown, we eill have to generate
281 a runtime test to test the loop count against the threshold. */
282 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
283 || (byte_misalign < 0))
286 targetm.vectorize.builtin_vectorization_cost (runtime_test);
287 if (vect_print_dump_info (REPORT_DETAILS))
288 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
289 targetm.vectorize.builtin_vectorization_cost (runtime_test));
293 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
294 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
296 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
297 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
300 /* Calculate number of iterations required to make the vector version
301 profitable, relative to the loop bodies only. The following condition
302 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
303 SIC = scalar iteration cost, VIC = vector iteration cost,
304 VOC = vector outside cost and VF = vectorization factor. */
306 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
308 if (vec_outside_cost == 0)
309 min_profitable_iters = 1;
312 min_profitable_iters = (vec_outside_cost * vf)
313 / ((scalar_single_iter_cost * vf)
316 if ((scalar_single_iter_cost * vf * min_profitable_iters)
317 <= ((vec_inside_cost * min_profitable_iters)
318 + (vec_outside_cost * vf)))
319 min_profitable_iters++;
322 /* vector version will never be profitable. */
325 if (vect_print_dump_info (REPORT_DETAILS))
326 fprintf (vect_dump, "cost model: vector iteration cost = %d "
327 "is divisible by scalar iteration cost = %d by a factor "
328 "greater than or equal to the vectorization factor = %d .",
329 vec_inside_cost, scalar_single_iter_cost, vf);
333 if (vect_print_dump_info (REPORT_DETAILS))
335 fprintf (vect_dump, "Cost model analysis: \n");
336 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
338 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
340 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
341 fprintf (vect_dump, " prologue iterations: %d\n",
342 peel_iters_prologue);
343 fprintf (vect_dump, " epilogue iterations: %d\n",
344 peel_iters_epilogue);
345 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
346 min_profitable_iters);
347 fprintf (vect_dump, " Actual minimum iters for profitability: %d\n",
348 min_profitable_iters < vf ? vf : min_profitable_iters);
351 min_profitable_iters =
352 min_profitable_iters < vf ? vf : min_profitable_iters;
354 /* Because the condition we create is:
355 if (niters <= min_profitable_iters)
356 then skip the vectorized loop. */
357 min_profitable_iters--;
358 return min_profitable_iters;
362 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
363 functions. Design better to avoid maintenance issues. */
365 /* Function vect_model_reduction_cost.
367 Models cost for a reduction operation, including the vector ops
368 generated within the strip-mine loop, the initial definition before
369 the loop, and the epilogue code that must be generated. */
372 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
381 enum machine_mode mode;
382 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
383 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
384 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
385 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
387 /* Cost of reduction op inside loop. */
388 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
390 reduction_op = TREE_OPERAND (operation, op_type-1);
391 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
392 mode = TYPE_MODE (vectype);
393 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
396 orig_stmt = STMT_VINFO_STMT (stmt_info);
398 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
400 /* Add in cost for initial definition. */
401 outer_cost += TARG_SCALAR_TO_VEC_COST;
403 /* Determine cost of epilogue code.
405 We have a reduction operator that will reduce the vector in one statement.
406 Also requires scalar extract. */
408 if (!nested_in_vect_loop_p (loop, orig_stmt))
410 if (reduc_code < NUM_TREE_CODES)
411 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
414 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
416 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
417 int element_bitsize = tree_low_cst (bitsize, 1);
418 int nelements = vec_size_in_bits / element_bitsize;
420 optab = optab_for_tree_code (code, vectype);
422 /* We have a whole vector shift available. */
423 if (VECTOR_MODE_P (mode)
424 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
425 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
426 /* Final reduction via vector shifts and the reduction operator. Also
427 requires scalar extract. */
428 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
429 + TARG_VEC_TO_SCALAR_COST);
431 /* Use extracts and reduction op for final reduction. For N elements,
432 we have N extracts and N-1 reduction ops. */
433 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
437 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
439 if (vect_print_dump_info (REPORT_DETAILS))
440 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
441 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
442 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
446 /* Function vect_model_induction_cost.
448 Models cost for induction operations. */
451 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
453 /* loop cost for vec_loop. */
454 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
455 /* prologue cost for vec_init and vec_step. */
456 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
458 if (vect_print_dump_info (REPORT_DETAILS))
459 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
460 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
461 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
465 /* Function vect_model_simple_cost.
467 Models cost for simple operations, i.e. those that only emit ncopies of a
468 single op. Right now, this does not account for multiple insns that could
469 be generated for the single vector op. We will handle that shortly. */
472 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
473 enum vect_def_type *dt, slp_tree slp_node)
476 int inside_cost = 0, outside_cost = 0;
478 inside_cost = ncopies * TARG_VEC_STMT_COST;
480 /* FORNOW: Assuming maximum 2 args per stmts. */
481 for (i = 0; i < 2; i++)
483 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
484 outside_cost += TARG_SCALAR_TO_VEC_COST;
487 if (vect_print_dump_info (REPORT_DETAILS))
488 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
489 "outside_cost = %d .", inside_cost, outside_cost);
491 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
492 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
493 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
497 /* Function vect_cost_strided_group_size
499 For strided load or store, return the group_size only if it is the first
500 load or store of a group, else return 1. This ensures that group size is
501 only returned once per group. */
504 vect_cost_strided_group_size (stmt_vec_info stmt_info)
506 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
508 if (first_stmt == STMT_VINFO_STMT (stmt_info))
509 return DR_GROUP_SIZE (stmt_info);
515 /* Function vect_model_store_cost
517 Models cost for stores. In the case of strided accesses, one access
518 has the overhead of the strided access attributed to it. */
521 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
522 enum vect_def_type dt, slp_tree slp_node)
525 int inside_cost = 0, outside_cost = 0;
527 if (dt == vect_constant_def || dt == vect_invariant_def)
528 outside_cost = TARG_SCALAR_TO_VEC_COST;
530 /* Strided access? */
531 if (DR_GROUP_FIRST_DR (stmt_info))
532 group_size = vect_cost_strided_group_size (stmt_info);
533 /* Not a strided access. */
537 /* Is this an access in a group of stores, which provide strided access?
538 If so, add in the cost of the permutes. */
541 /* Uses a high and low interleave operation for each needed permute. */
542 inside_cost = ncopies * exact_log2(group_size) * group_size
543 * TARG_VEC_STMT_COST;
545 if (vect_print_dump_info (REPORT_DETAILS))
546 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
551 /* Costs of the stores. */
552 inside_cost += ncopies * TARG_VEC_STORE_COST;
554 if (vect_print_dump_info (REPORT_DETAILS))
555 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
556 "outside_cost = %d .", inside_cost, outside_cost);
558 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
559 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
560 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
564 /* Function vect_model_load_cost
566 Models cost for loads. In the case of strided accesses, the last access
567 has the overhead of the strided access attributed to it. Since unaligned
568 accesses are supported for loads, we also account for the costs of the
569 access scheme chosen. */
572 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
576 int alignment_support_cheme;
578 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
579 int inside_cost = 0, outside_cost = 0;
581 /* Strided accesses? */
582 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
583 if (first_stmt && !slp_node)
585 group_size = vect_cost_strided_group_size (stmt_info);
586 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
588 /* Not a strided access. */
595 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
597 /* Is this an access in a group of loads providing strided access?
598 If so, add in the cost of the permutes. */
601 /* Uses an even and odd extract operations for each needed permute. */
602 inside_cost = ncopies * exact_log2(group_size) * group_size
603 * TARG_VEC_STMT_COST;
605 if (vect_print_dump_info (REPORT_DETAILS))
606 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
611 /* The loads themselves. */
612 switch (alignment_support_cheme)
616 inside_cost += ncopies * TARG_VEC_LOAD_COST;
618 if (vect_print_dump_info (REPORT_DETAILS))
619 fprintf (vect_dump, "vect_model_load_cost: aligned.");
623 case dr_unaligned_supported:
625 /* Here, we assign an additional cost for the unaligned load. */
626 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
628 if (vect_print_dump_info (REPORT_DETAILS))
629 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
634 case dr_explicit_realign:
636 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
638 /* FIXME: If the misalignment remains fixed across the iterations of
639 the containing loop, the following cost should be added to the
641 if (targetm.vectorize.builtin_mask_for_load)
642 inside_cost += TARG_VEC_STMT_COST;
646 case dr_explicit_realign_optimized:
648 if (vect_print_dump_info (REPORT_DETAILS))
649 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
652 /* Unaligned software pipeline has a load of an address, an initial
653 load, and possibly a mask operation to "prime" the loop. However,
654 if this is an access in a group of loads, which provide strided
655 access, then the above cost should only be considered for one
656 access in the group. Inside the loop, there is a load op
657 and a realignment op. */
659 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
661 outside_cost = 2*TARG_VEC_STMT_COST;
662 if (targetm.vectorize.builtin_mask_for_load)
663 outside_cost += TARG_VEC_STMT_COST;
666 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
675 if (vect_print_dump_info (REPORT_DETAILS))
676 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
677 "outside_cost = %d .", inside_cost, outside_cost);
679 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
680 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
681 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
685 /* Function vect_get_new_vect_var.
687 Returns a name for a new variable. The current naming scheme appends the
688 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
689 the name of vectorizer generated variables, and appends that to NAME if
693 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
700 case vect_simple_var:
703 case vect_scalar_var:
706 case vect_pointer_var:
715 char* tmp = concat (prefix, name, NULL);
716 new_vect_var = create_tmp_var (type, tmp);
720 new_vect_var = create_tmp_var (type, prefix);
722 /* Mark vector typed variable as a gimple register variable. */
723 if (TREE_CODE (type) == VECTOR_TYPE)
724 DECL_GIMPLE_REG_P (new_vect_var) = true;
730 /* Function vect_create_addr_base_for_vector_ref.
732 Create an expression that computes the address of the first memory location
733 that will be accessed for a data reference.
736 STMT: The statement containing the data reference.
737 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
738 OFFSET: Optional. If supplied, it is be added to the initial address.
739 LOOP: Specify relative to which loop-nest should the address be computed.
740 For example, when the dataref is in an inner-loop nested in an
741 outer-loop that is now being vectorized, LOOP can be either the
742 outer-loop, or the inner-loop. The first memory location accessed
743 by the following dataref ('in' points to short):
750 if LOOP=i_loop: &in (relative to i_loop)
751 if LOOP=j_loop: &in+i*2B (relative to j_loop)
754 1. Return an SSA_NAME whose value is the address of the memory location of
755 the first vector of the data reference.
756 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
757 these statement(s) which define the returned SSA_NAME.
759 FORNOW: We are only handling array accesses with step 1. */
762 vect_create_addr_base_for_vector_ref (tree stmt,
767 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
768 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
769 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
770 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
772 tree data_ref_base_var;
775 tree addr_base, addr_expr;
777 tree base_offset = unshare_expr (DR_OFFSET (dr));
778 tree init = unshare_expr (DR_INIT (dr));
779 tree vect_ptr_type, addr_expr2;
780 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
783 if (loop != containing_loop)
785 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
786 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
788 gcc_assert (nested_in_vect_loop_p (loop, stmt));
790 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
791 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
792 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
795 /* Create data_ref_base */
796 base_name = build_fold_indirect_ref (data_ref_base);
797 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
798 add_referenced_var (data_ref_base_var);
799 data_ref_base = force_gimple_operand (data_ref_base, &new_base_stmt,
800 true, data_ref_base_var);
801 append_to_statement_list_force(new_base_stmt, new_stmt_list);
803 /* Create base_offset */
804 base_offset = size_binop (PLUS_EXPR, base_offset, init);
805 base_offset = fold_convert (sizetype, base_offset);
806 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
807 add_referenced_var (dest);
808 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
809 append_to_statement_list_force (new_stmt, new_stmt_list);
813 tree tmp = create_tmp_var (sizetype, "offset");
815 add_referenced_var (tmp);
816 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
817 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
818 base_offset, offset);
819 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
820 append_to_statement_list_force (new_stmt, new_stmt_list);
823 /* base + base_offset */
824 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
825 data_ref_base, base_offset);
827 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
829 /* addr_expr = addr_base */
830 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
831 get_name (base_name));
832 add_referenced_var (addr_expr);
833 vec_stmt = fold_convert (vect_ptr_type, addr_base);
834 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
835 get_name (base_name));
836 add_referenced_var (addr_expr2);
837 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
838 append_to_statement_list_force (new_stmt, new_stmt_list);
840 if (vect_print_dump_info (REPORT_DETAILS))
842 fprintf (vect_dump, "created ");
843 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
849 /* Function vect_create_data_ref_ptr.
851 Create a new pointer to vector type (vp), that points to the first location
852 accessed in the loop by STMT, along with the def-use update chain to
853 appropriately advance the pointer through the loop iterations. Also set
854 aliasing information for the pointer. This vector pointer is used by the
855 callers to this function to create a memory reference expression for vector
859 1. STMT: a stmt that references memory. Expected to be of the form
860 GIMPLE_MODIFY_STMT <name, data-ref> or
861 GIMPLE_MODIFY_STMT <data-ref, name>.
862 2. AT_LOOP: the loop where the vector memref is to be created.
863 3. OFFSET (optional): an offset to be added to the initial address accessed
864 by the data-ref in STMT.
865 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
866 pointing to the initial address.
867 5. TYPE: if not NULL indicates the required type of the data-ref
870 1. Declare a new ptr to vector_type, and have it point to the base of the
871 data reference (initial addressed accessed by the data reference).
872 For example, for vector of type V8HI, the following code is generated:
875 vp = (v8hi *)initial_address;
877 if OFFSET is not supplied:
878 initial_address = &a[init];
879 if OFFSET is supplied:
880 initial_address = &a[init + OFFSET];
882 Return the initial_address in INITIAL_ADDRESS.
884 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
885 update the pointer in each iteration of the loop.
887 Return the increment stmt that updates the pointer in PTR_INCR.
889 3. Set INV_P to true if the access pattern of the data reference in the
890 vectorized loop is invariant. Set it to false otherwise.
892 4. Return the pointer. */
895 vect_create_data_ref_ptr (tree stmt, struct loop *at_loop,
896 tree offset, tree *initial_address, tree *ptr_incr,
897 bool only_init, tree type, bool *inv_p)
900 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
901 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
902 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
903 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
904 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
905 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
911 tree new_stmt_list = NULL_TREE;
915 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
917 block_stmt_iterator incr_bsi;
919 tree indx_before_incr, indx_after_incr;
923 /* Check the step (evolution) of the load in LOOP, and record
924 whether it's invariant. */
925 if (nested_in_vect_loop)
926 step = STMT_VINFO_DR_STEP (stmt_info);
928 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
930 if (tree_int_cst_compare (step, size_zero_node) == 0)
935 /* Create an expression for the first address accessed by this load
937 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
939 if (vect_print_dump_info (REPORT_DETAILS))
941 tree data_ref_base = base_name;
942 fprintf (vect_dump, "create vector-pointer variable to type: ");
943 print_generic_expr (vect_dump, vectype, TDF_SLIM);
944 if (TREE_CODE (data_ref_base) == VAR_DECL)
945 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
946 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
947 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
948 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
949 fprintf (vect_dump, " vectorizing a record based array ref: ");
950 else if (TREE_CODE (data_ref_base) == SSA_NAME)
951 fprintf (vect_dump, " vectorizing a pointer ref: ");
952 print_generic_expr (vect_dump, base_name, TDF_SLIM);
955 /** (1) Create the new vector-pointer variable: **/
957 vect_ptr_type = build_pointer_type (type);
959 vect_ptr_type = build_pointer_type (vectype);
960 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
961 get_name (base_name));
962 add_referenced_var (vect_ptr);
964 /** (2) Add aliasing information to the new vector-pointer:
965 (The points-to info (DR_PTR_INFO) may be defined later.) **/
967 tag = DR_SYMBOL_TAG (dr);
970 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
971 tag must be created with tag added to its may alias list. */
973 new_type_alias (vect_ptr, tag, DR_REF (dr));
975 set_symbol_mem_tag (vect_ptr, tag);
977 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
979 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
980 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
981 def-use update cycles for the pointer: One relative to the outer-loop
982 (LOOP), which is what steps (3) and (4) below do. The other is relative
983 to the inner-loop (which is the inner-most loop containing the dataref),
984 and this is done be step (5) below.
986 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
987 inner-most loop, and so steps (3),(4) work the same, and step (5) is
988 redundant. Steps (3),(4) create the following:
991 LOOP: vp1 = phi(vp0,vp2)
997 If there is an inner-loop nested in loop, then step (5) will also be
998 applied, and an additional update in the inner-loop will be created:
1001 LOOP: vp1 = phi(vp0,vp2)
1003 inner: vp3 = phi(vp1,vp4)
1004 vp4 = vp3 + inner_step
1010 /** (3) Calculate the initial address the vector-pointer, and set
1011 the vector-pointer to point to it before the loop: **/
1013 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1015 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1017 pe = loop_preheader_edge (loop);
1018 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1019 gcc_assert (!new_bb);
1020 *initial_address = new_temp;
1022 /* Create: p = (vectype *) initial_base */
1023 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1024 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
1025 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
1026 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
1027 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1028 gcc_assert (!new_bb);
1031 /** (4) Handle the updating of the vector-pointer inside the loop.
1032 This is needed when ONLY_INIT is false, and also when AT_LOOP
1033 is the inner-loop nested in LOOP (during outer-loop vectorization).
1036 if (only_init && at_loop == loop) /* No update in loop is required. */
1038 /* Copy the points-to information if it exists. */
1039 if (DR_PTR_INFO (dr))
1040 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
1041 vptr = vect_ptr_init;
1045 /* The step of the vector pointer is the Vector Size. */
1046 tree step = TYPE_SIZE_UNIT (vectype);
1047 /* One exception to the above is when the scalar step of the load in
1048 LOOP is zero. In this case the step here is also zero. */
1050 step = size_zero_node;
1052 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1054 create_iv (vect_ptr_init,
1055 fold_convert (vect_ptr_type, step),
1056 NULL_TREE, loop, &incr_bsi, insert_after,
1057 &indx_before_incr, &indx_after_incr);
1058 incr = bsi_stmt (incr_bsi);
1059 set_stmt_info (stmt_ann (incr),
1060 new_stmt_vec_info (incr, loop_vinfo));
1062 /* Copy the points-to information if it exists. */
1063 if (DR_PTR_INFO (dr))
1065 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1066 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1068 merge_alias_info (vect_ptr_init, indx_before_incr);
1069 merge_alias_info (vect_ptr_init, indx_after_incr);
1073 vptr = indx_before_incr;
1076 if (!nested_in_vect_loop || only_init)
1080 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1081 nested in LOOP, if exists: **/
1083 gcc_assert (nested_in_vect_loop);
1086 standard_iv_increment_position (containing_loop, &incr_bsi,
1088 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1089 containing_loop, &incr_bsi, insert_after, &indx_before_incr,
1091 incr = bsi_stmt (incr_bsi);
1092 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1094 /* Copy the points-to information if it exists. */
1095 if (DR_PTR_INFO (dr))
1097 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1098 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1100 merge_alias_info (vect_ptr_init, indx_before_incr);
1101 merge_alias_info (vect_ptr_init, indx_after_incr);
1105 return indx_before_incr;
1112 /* Function bump_vector_ptr
1114 Increment a pointer (to a vector type) by vector-size. If requested,
1115 i.e. if PTR-INCR is given, then also connect the new increment stmt
1116 to the existing def-use update-chain of the pointer, by modifying
1117 the PTR_INCR as illustrated below:
1119 The pointer def-use update-chain before this function:
1120 DATAREF_PTR = phi (p_0, p_2)
1122 PTR_INCR: p_2 = DATAREF_PTR + step
1124 The pointer def-use update-chain after this function:
1125 DATAREF_PTR = phi (p_0, p_2)
1127 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1129 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1132 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1134 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1135 the loop. The increment amount across iterations is expected
1137 BSI - location where the new update stmt is to be placed.
1138 STMT - the original scalar memory-access stmt that is being vectorized.
1139 BUMP - optional. The offset by which to bump the pointer. If not given,
1140 the offset is assumed to be vector_size.
1142 Output: Return NEW_DATAREF_PTR as illustrated above.
1147 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
1148 tree stmt, tree bump)
1150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1151 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1152 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1153 tree vptr_type = TREE_TYPE (dataref_ptr);
1154 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
1155 tree update = TYPE_SIZE_UNIT (vectype);
1158 use_operand_p use_p;
1159 tree new_dataref_ptr;
1164 incr_stmt = build_gimple_modify_stmt (ptr_var,
1165 build2 (POINTER_PLUS_EXPR, vptr_type,
1166 dataref_ptr, update));
1167 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1168 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
1169 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1171 /* Copy the points-to information if it exists. */
1172 if (DR_PTR_INFO (dr))
1173 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1174 merge_alias_info (new_dataref_ptr, dataref_ptr);
1177 return new_dataref_ptr;
1179 /* Update the vector-pointer's cross-iteration increment. */
1180 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1182 tree use = USE_FROM_PTR (use_p);
1184 if (use == dataref_ptr)
1185 SET_USE (use_p, new_dataref_ptr);
1187 gcc_assert (tree_int_cst_compare (use, update) == 0);
1190 return new_dataref_ptr;
1194 /* Function vect_create_destination_var.
1196 Create a new temporary of type VECTYPE. */
1199 vect_create_destination_var (tree scalar_dest, tree vectype)
1202 const char *new_name;
1204 enum vect_var_kind kind;
1206 kind = vectype ? vect_simple_var : vect_scalar_var;
1207 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1209 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1211 new_name = get_name (scalar_dest);
1214 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1215 add_referenced_var (vec_dest);
1221 /* Function vect_init_vector.
1223 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1224 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1225 is not NULL. Otherwise, place the initialization at the loop preheader.
1226 Return the DEF of INIT_STMT.
1227 It will be used in the vectorization of STMT. */
1230 vect_init_vector (tree stmt, tree vector_var, tree vector_type,
1231 block_stmt_iterator *bsi)
1233 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1241 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1242 add_referenced_var (new_var);
1243 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1244 new_temp = make_ssa_name (new_var, init_stmt);
1245 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1248 vect_finish_stmt_generation (stmt, init_stmt, bsi);
1251 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1252 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1254 if (nested_in_vect_loop_p (loop, stmt))
1256 pe = loop_preheader_edge (loop);
1257 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1258 gcc_assert (!new_bb);
1261 if (vect_print_dump_info (REPORT_DETAILS))
1263 fprintf (vect_dump, "created new init_stmt: ");
1264 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1267 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1272 /* For constant and loop invariant defs of SLP_NODE this function returns
1273 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1274 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1278 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1279 unsigned int op_num)
1281 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1282 tree stmt = VEC_index (tree, stmts, 0);
1283 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1284 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1285 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1288 int j, number_of_places_left_in_vector;
1290 tree op, vop, operation;
1291 int group_size = VEC_length (tree, stmts);
1292 unsigned int vec_num, i;
1293 int number_of_copies = 1;
1294 bool is_store = false;
1295 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1296 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1298 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1301 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1302 created vectors. It is greater than 1 if unrolling is performed.
1304 For example, we have two scalar operands, s1 and s2 (e.g., group of
1305 strided accesses of size two), while NUINTS is four (i.e., four scalars
1306 of this type can be packed in a vector). The output vector will contain
1307 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1310 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1311 containing the operands.
1313 For example, NUINTS is four as before, and the group size is 8
1314 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1315 {s5, s6, s7, s8}. */
1317 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1319 number_of_places_left_in_vector = nunits;
1320 for (j = 0; j < number_of_copies; j++)
1322 for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
1324 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1328 op = TREE_OPERAND (operation, op_num);
1330 /* Create 'vect_ = {op0,op1,...,opn}'. */
1331 t = tree_cons (NULL_TREE, op, t);
1333 number_of_places_left_in_vector--;
1335 if (number_of_places_left_in_vector == 0)
1337 number_of_places_left_in_vector = nunits;
1339 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1340 vec_cst = build_constructor_from_list (vector_type, t);
1341 VEC_quick_push (tree, voprnds,
1342 vect_init_vector (stmt, vec_cst, vector_type,
1349 /* Since the vectors are created in the reverse order, we should invert
1351 vec_num = VEC_length (tree, voprnds);
1352 for (j = vec_num - 1; j >= 0; j--)
1354 vop = VEC_index (tree, voprnds, j);
1355 VEC_quick_push (tree, *vec_oprnds, vop);
1358 VEC_free (tree, heap, voprnds);
1360 /* In case that VF is greater than the unrolling factor needed for the SLP
1361 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1362 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1363 to replicate the vectors. */
1364 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1366 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1367 VEC_quick_push (tree, *vec_oprnds, vop);
1372 /* Get vectorized defintions from SLP_NODE that contains corresponding
1373 vectorized def-stmts. */
1376 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1382 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1385 VEC_iterate (tree, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1388 gcc_assert (vec_def_stmt);
1389 vec_oprnd = GIMPLE_STMT_OPERAND (vec_def_stmt, 0);
1390 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1395 /* Get vectorized definitions for SLP_NODE.
1396 If the scalar definitions are loop invariants or constants, collect them and
1397 call vect_get_constant_vectors() to create vector stmts.
1398 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1399 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1400 vect_get_slp_vect_defs() to retrieve them. */
1403 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1404 VEC (tree,heap) **vec_oprnds1)
1406 tree operation, first_stmt;
1408 /* Allocate memory for vectorized defs. */
1409 *vec_oprnds0 = VEC_alloc (tree, heap,
1410 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1412 /* SLP_NODE corresponds either to a group of stores or to a group of
1413 unary/binary operations. We don't call this function for loads. */
1414 if (SLP_TREE_LEFT (slp_node))
1415 /* The defs are already vectorized. */
1416 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1418 /* Build vectors from scalar defs. */
1419 vect_get_constant_vectors (slp_node, vec_oprnds0, 0);
1421 first_stmt = VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1422 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1423 /* Since we don't call this function with loads, this is a group of
1427 operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
1428 if (TREE_OPERAND_LENGTH (operation) == unary_op)
1431 *vec_oprnds1 = VEC_alloc (tree, heap,
1432 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1434 if (SLP_TREE_RIGHT (slp_node))
1435 /* The defs are already vectorized. */
1436 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1438 /* Build vectors from scalar defs. */
1439 vect_get_constant_vectors (slp_node, vec_oprnds1, 1);
1443 /* Function get_initial_def_for_induction
1446 STMT - a stmt that performs an induction operation in the loop.
1447 IV_PHI - the initial value of the induction variable
1450 Return a vector variable, initialized with the first VF values of
1451 the induction variable. E.g., for an iv with IV_PHI='X' and
1452 evolution S, for a vector of 4 units, we want to return:
1453 [X, X + S, X + 2*S, X + 3*S]. */
1456 get_initial_def_for_induction (tree iv_phi)
1458 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1459 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1460 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1461 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1462 tree vectype = get_vectype_for_scalar_type (scalar_type);
1463 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1464 edge pe = loop_preheader_edge (loop);
1465 struct loop *iv_loop;
1467 tree vec, vec_init, vec_step, t;
1472 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1473 tree init_expr, step_expr;
1474 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1477 int ncopies = vf / nunits;
1479 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1480 bool nested_in_vect_loop = false;
1482 imm_use_iterator imm_iter;
1483 use_operand_p use_p;
1487 block_stmt_iterator si;
1488 basic_block bb = bb_for_stmt (iv_phi);
1490 gcc_assert (phi_info);
1491 gcc_assert (ncopies >= 1);
1493 /* Find the first insertion point in the BB. */
1494 si = bsi_after_labels (bb);
1496 if (INTEGRAL_TYPE_P (scalar_type))
1497 step_expr = build_int_cst (scalar_type, 0);
1499 step_expr = build_real (scalar_type, dconst0);
1501 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1502 if (nested_in_vect_loop_p (loop, iv_phi))
1504 nested_in_vect_loop = true;
1505 iv_loop = loop->inner;
1509 gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
1511 latch_e = loop_latch_edge (iv_loop);
1512 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1514 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
1515 gcc_assert (access_fn);
1516 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1517 &init_expr, &step_expr);
1519 pe = loop_preheader_edge (iv_loop);
1521 /* Create the vector that holds the initial_value of the induction. */
1522 if (nested_in_vect_loop)
1524 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1525 been created during vectorization of previous stmts; We obtain it from
1526 the STMT_VINFO_VEC_STMT of the defining stmt. */
1527 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1528 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
1532 /* iv_loop is the loop to be vectorized. Create:
1533 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1534 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1535 add_referenced_var (new_var);
1537 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1540 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1541 gcc_assert (!new_bb);
1545 t = tree_cons (NULL_TREE, init_expr, t);
1546 for (i = 1; i < nunits; i++)
1550 /* Create: new_name_i = new_name + step_expr */
1551 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1552 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1553 new_name = make_ssa_name (new_var, init_stmt);
1554 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1556 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1557 gcc_assert (!new_bb);
1559 if (vect_print_dump_info (REPORT_DETAILS))
1561 fprintf (vect_dump, "created new init_stmt: ");
1562 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1564 t = tree_cons (NULL_TREE, new_name, t);
1566 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1567 vec = build_constructor_from_list (vectype, nreverse (t));
1568 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
1572 /* Create the vector that holds the step of the induction. */
1573 if (nested_in_vect_loop)
1574 /* iv_loop is nested in the loop to be vectorized. Generate:
1575 vec_step = [S, S, S, S] */
1576 new_name = step_expr;
1579 /* iv_loop is the loop to be vectorized. Generate:
1580 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1581 expr = build_int_cst (scalar_type, vf);
1582 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1586 for (i = 0; i < nunits; i++)
1587 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1588 vec = build_constructor_from_list (vectype, t);
1589 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1592 /* Create the following def-use cycle:
1597 vec_iv = PHI <vec_init, vec_loop>
1601 vec_loop = vec_iv + vec_step; */
1603 /* Create the induction-phi that defines the induction-operand. */
1604 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1605 add_referenced_var (vec_dest);
1606 induction_phi = create_phi_node (vec_dest, iv_loop->header);
1607 set_stmt_info (get_stmt_ann (induction_phi),
1608 new_stmt_vec_info (induction_phi, loop_vinfo));
1609 induc_def = PHI_RESULT (induction_phi);
1611 /* Create the iv update inside the loop */
1612 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1613 build2 (PLUS_EXPR, vectype,
1614 induc_def, vec_step));
1615 vec_def = make_ssa_name (vec_dest, new_stmt);
1616 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1617 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1618 set_stmt_info (get_stmt_ann (new_stmt),
1619 new_stmt_vec_info (new_stmt, loop_vinfo));
1621 /* Set the arguments of the phi node: */
1622 add_phi_arg (induction_phi, vec_init, pe);
1623 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
1626 /* In case that vectorization factor (VF) is bigger than the number
1627 of elements that we can fit in a vectype (nunits), we have to generate
1628 more than one vector stmt - i.e - we need to "unroll" the
1629 vector stmt by a factor VF/nunits. For more details see documentation
1630 in vectorizable_operation. */
1634 stmt_vec_info prev_stmt_vinfo;
1635 /* FORNOW. This restriction should be relaxed. */
1636 gcc_assert (!nested_in_vect_loop);
1638 /* Create the vector that holds the step of the induction. */
1639 expr = build_int_cst (scalar_type, nunits);
1640 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1642 for (i = 0; i < nunits; i++)
1643 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1644 vec = build_constructor_from_list (vectype, t);
1645 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1647 vec_def = induc_def;
1648 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1649 for (i = 1; i < ncopies; i++)
1653 /* vec_i = vec_prev + vec_step */
1654 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1655 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1656 vec_def = make_ssa_name (vec_dest, new_stmt);
1657 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1658 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1659 set_stmt_info (get_stmt_ann (new_stmt),
1660 new_stmt_vec_info (new_stmt, loop_vinfo));
1661 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1662 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1666 if (nested_in_vect_loop)
1668 /* Find the loop-closed exit-phi of the induction, and record
1669 the final vector of induction results: */
1671 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1673 if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
1675 exit_phi = USE_STMT (use_p);
1681 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1682 /* FORNOW. Currently not supporting the case that an inner-loop induction
1683 is not used in the outer-loop (i.e. only outside the outer-loop). */
1684 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1685 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1687 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1688 if (vect_print_dump_info (REPORT_DETAILS))
1690 fprintf (vect_dump, "vector of inductions after inner-loop:");
1691 print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
1697 if (vect_print_dump_info (REPORT_DETAILS))
1699 fprintf (vect_dump, "transform induction: created def-use cycle:");
1700 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1701 fprintf (vect_dump, "\n");
1702 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1705 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1710 /* Function vect_get_vec_def_for_operand.
1712 OP is an operand in STMT. This function returns a (vector) def that will be
1713 used in the vectorized stmt for STMT.
1715 In the case that OP is an SSA_NAME which is defined in the loop, then
1716 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1718 In case OP is an invariant or constant, a new stmt that creates a vector def
1719 needs to be introduced. */
1722 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1727 stmt_vec_info def_stmt_info = NULL;
1728 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1729 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1730 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1731 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1737 enum vect_def_type dt;
1741 if (vect_print_dump_info (REPORT_DETAILS))
1743 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1744 print_generic_expr (vect_dump, op, TDF_SLIM);
1747 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1748 gcc_assert (is_simple_use);
1749 if (vect_print_dump_info (REPORT_DETAILS))
1753 fprintf (vect_dump, "def = ");
1754 print_generic_expr (vect_dump, def, TDF_SLIM);
1758 fprintf (vect_dump, " def_stmt = ");
1759 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1765 /* Case 1: operand is a constant. */
1766 case vect_constant_def:
1771 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1772 if (vect_print_dump_info (REPORT_DETAILS))
1773 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1775 for (i = nunits - 1; i >= 0; --i)
1777 t = tree_cons (NULL_TREE, op, t);
1779 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1780 vec_cst = build_vector (vector_type, t);
1782 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1785 /* Case 2: operand is defined outside the loop - loop invariant. */
1786 case vect_invariant_def:
1791 /* Create 'vec_inv = {inv,inv,..,inv}' */
1792 if (vect_print_dump_info (REPORT_DETAILS))
1793 fprintf (vect_dump, "Create vector_inv.");
1795 for (i = nunits - 1; i >= 0; --i)
1797 t = tree_cons (NULL_TREE, def, t);
1800 /* FIXME: use build_constructor directly. */
1801 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1802 vec_inv = build_constructor_from_list (vector_type, t);
1803 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1806 /* Case 3: operand is defined inside the loop. */
1810 *scalar_def = def_stmt;
1812 /* Get the def from the vectorized stmt. */
1813 def_stmt_info = vinfo_for_stmt (def_stmt);
1814 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1815 gcc_assert (vec_stmt);
1816 if (TREE_CODE (vec_stmt) == PHI_NODE)
1817 vec_oprnd = PHI_RESULT (vec_stmt);
1819 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1823 /* Case 4: operand is defined by a loop header phi - reduction */
1824 case vect_reduction_def:
1828 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1829 loop = (bb_for_stmt (def_stmt))->loop_father;
1831 /* Get the def before the loop */
1832 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1833 return get_initial_def_for_reduction (stmt, op, scalar_def);
1836 /* Case 5: operand is defined by loop-header phi - induction. */
1837 case vect_induction_def:
1839 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1841 /* Get the def from the vectorized stmt. */
1842 def_stmt_info = vinfo_for_stmt (def_stmt);
1843 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1844 gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
1845 vec_oprnd = PHI_RESULT (vec_stmt);
1855 /* Function vect_get_vec_def_for_stmt_copy
1857 Return a vector-def for an operand. This function is used when the
1858 vectorized stmt to be created (by the caller to this function) is a "copy"
1859 created in case the vectorized result cannot fit in one vector, and several
1860 copies of the vector-stmt are required. In this case the vector-def is
1861 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1862 of the stmt that defines VEC_OPRND.
1863 DT is the type of the vector def VEC_OPRND.
1866 In case the vectorization factor (VF) is bigger than the number
1867 of elements that can fit in a vectype (nunits), we have to generate
1868 more than one vector stmt to vectorize the scalar stmt. This situation
1869 arises when there are multiple data-types operated upon in the loop; the
1870 smallest data-type determines the VF, and as a result, when vectorizing
1871 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1872 vector stmt (each computing a vector of 'nunits' results, and together
1873 computing 'VF' results in each iteration). This function is called when
1874 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1875 which VF=16 and nunits=4, so the number of copies required is 4):
1877 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1879 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1880 VS1.1: vx.1 = memref1 VS1.2
1881 VS1.2: vx.2 = memref2 VS1.3
1882 VS1.3: vx.3 = memref3
1884 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1885 VSnew.1: vz1 = vx.1 + ... VSnew.2
1886 VSnew.2: vz2 = vx.2 + ... VSnew.3
1887 VSnew.3: vz3 = vx.3 + ...
1889 The vectorization of S1 is explained in vectorizable_load.
1890 The vectorization of S2:
1891 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1892 the function 'vect_get_vec_def_for_operand' is called to
1893 get the relevant vector-def for each operand of S2. For operand x it
1894 returns the vector-def 'vx.0'.
1896 To create the remaining copies of the vector-stmt (VSnew.j), this
1897 function is called to get the relevant vector-def for each operand. It is
1898 obtained from the respective VS1.j stmt, which is recorded in the
1899 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1901 For example, to obtain the vector-def 'vx.1' in order to create the
1902 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1903 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1904 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1905 and return its def ('vx.1').
1906 Overall, to create the above sequence this function will be called 3 times:
1907 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1908 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1909 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1912 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1914 tree vec_stmt_for_operand;
1915 stmt_vec_info def_stmt_info;
1917 /* Do nothing; can reuse same def. */
1918 if (dt == vect_invariant_def || dt == vect_constant_def )
1921 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1922 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1923 gcc_assert (def_stmt_info);
1924 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1925 gcc_assert (vec_stmt_for_operand);
1926 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1931 /* Get vectorized definitions for the operands to create a copy of an original
1932 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1935 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1936 VEC(tree,heap) **vec_oprnds0,
1937 VEC(tree,heap) **vec_oprnds1)
1939 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1941 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1942 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1946 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1947 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1948 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1953 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
1956 vect_get_vec_defs (tree op0, tree op1, tree stmt, VEC(tree,heap) **vec_oprnds0,
1957 VEC(tree,heap) **vec_oprnds1, slp_tree slp_node)
1960 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
1965 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
1966 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
1967 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1971 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
1972 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
1973 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1979 /* Function vect_finish_stmt_generation.
1981 Insert a new stmt. */
1984 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
1985 block_stmt_iterator *bsi)
1987 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1988 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1990 gcc_assert (stmt == bsi_stmt (*bsi));
1991 gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
1993 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1995 set_stmt_info (get_stmt_ann (vec_stmt),
1996 new_stmt_vec_info (vec_stmt, loop_vinfo));
1998 if (vect_print_dump_info (REPORT_DETAILS))
2000 fprintf (vect_dump, "add new stmt: ");
2001 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2004 /* Make sure bsi points to the stmt that is being vectorized. */
2005 gcc_assert (stmt == bsi_stmt (*bsi));
2007 #ifdef USE_MAPPED_LOCATION
2008 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
2010 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2015 /* Function get_initial_def_for_reduction
2018 STMT - a stmt that performs a reduction operation in the loop.
2019 INIT_VAL - the initial value of the reduction variable
2022 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2023 of the reduction (used for adjusting the epilog - see below).
2024 Return a vector variable, initialized according to the operation that STMT
2025 performs. This vector will be used as the initial value of the
2026 vector of partial results.
2028 Option1 (adjust in epilog): Initialize the vector as follows:
2031 min/max: [init_val,init_val,..,init_val,init_val]
2032 bit and/or: [init_val,init_val,..,init_val,init_val]
2033 and when necessary (e.g. add/mult case) let the caller know
2034 that it needs to adjust the result by init_val.
2036 Option2: Initialize the vector as follows:
2037 add: [0,0,...,0,init_val]
2038 mult: [1,1,...,1,init_val]
2039 min/max: [init_val,init_val,...,init_val]
2040 bit and/or: [init_val,init_val,...,init_val]
2041 and no adjustments are needed.
2043 For example, for the following code:
2049 STMT is 's = s + a[i]', and the reduction variable is 's'.
2050 For a vector of 4 units, we want to return either [0,0,0,init_val],
2051 or [0,0,0,0] and let the caller know that it needs to adjust
2052 the result at the end by 'init_val'.
2054 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2055 initialization vector is simpler (same element in all entries).
2056 A cost model should help decide between these two schemes. */
2059 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
2061 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2062 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2063 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2064 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2065 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2066 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2067 tree type = TREE_TYPE (init_val);
2074 bool nested_in_vect_loop = false;
2076 gcc_assert (INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
2077 if (nested_in_vect_loop_p (loop, stmt))
2078 nested_in_vect_loop = true;
2080 gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
2082 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2086 case WIDEN_SUM_EXPR:
2089 if (nested_in_vect_loop)
2090 *adjustment_def = vecdef;
2092 *adjustment_def = init_val;
2093 /* Create a vector of zeros for init_def. */
2094 if (INTEGRAL_TYPE_P (type))
2095 def_for_init = build_int_cst (type, 0);
2097 def_for_init = build_real (type, dconst0);
2098 for (i = nunits - 1; i >= 0; --i)
2099 t = tree_cons (NULL_TREE, def_for_init, t);
2100 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
2101 init_def = build_vector (vector_type, t);
2106 *adjustment_def = NULL_TREE;
2118 /* Function vect_create_epilog_for_reduction
2120 Create code at the loop-epilog to finalize the result of a reduction
2123 VECT_DEF is a vector of partial results.
2124 REDUC_CODE is the tree-code for the epilog reduction.
2125 STMT is the scalar reduction stmt that is being vectorized.
2126 REDUCTION_PHI is the phi-node that carries the reduction computation.
2129 1. Creates the reduction def-use cycle: sets the arguments for
2131 The loop-entry argument is the vectorized initial-value of the reduction.
2132 The loop-latch argument is VECT_DEF - the vector of partial sums.
2133 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2134 by applying the operation specified by REDUC_CODE if available, or by
2135 other means (whole-vector shifts or a scalar loop).
2136 The function also creates a new phi node at the loop exit to preserve
2137 loop-closed form, as illustrated below.
2139 The flow at the entry to this function:
2142 vec_def = phi <null, null> # REDUCTION_PHI
2143 VECT_DEF = vector_stmt # vectorized form of STMT
2144 s_loop = scalar_stmt # (scalar) STMT
2146 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2150 The above is transformed by this function into:
2153 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2154 VECT_DEF = vector_stmt # vectorized form of STMT
2155 s_loop = scalar_stmt # (scalar) STMT
2157 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2158 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2159 v_out2 = reduce <v_out1>
2160 s_out3 = extract_field <v_out2, 0>
2161 s_out4 = adjust_result <s_out3>
2167 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
2168 enum tree_code reduc_code, tree reduction_phi)
2170 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2172 enum machine_mode mode;
2173 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2174 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2175 basic_block exit_bb;
2179 block_stmt_iterator exit_bsi;
2181 tree new_temp = NULL_TREE;
2183 tree epilog_stmt = NULL_TREE;
2184 tree new_scalar_dest, exit_phi, new_dest;
2185 tree bitsize, bitpos, bytesize;
2186 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2187 tree adjustment_def;
2188 tree vec_initial_def;
2190 imm_use_iterator imm_iter;
2191 use_operand_p use_p;
2192 bool extract_scalar_result = false;
2193 tree reduction_op, expr;
2196 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
2197 bool nested_in_vect_loop = false;
2199 VEC(tree,heap) *phis = NULL;
2202 if (nested_in_vect_loop_p (loop, stmt))
2205 nested_in_vect_loop = true;
2208 op_type = TREE_OPERAND_LENGTH (operation);
2209 reduction_op = TREE_OPERAND (operation, op_type-1);
2210 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2211 mode = TYPE_MODE (vectype);
2213 /*** 1. Create the reduction def-use cycle ***/
2215 /* 1.1 set the loop-entry arg of the reduction-phi: */
2216 /* For the case of reduction, vect_get_vec_def_for_operand returns
2217 the scalar def before the loop, that defines the initial value
2218 of the reduction variable. */
2219 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2221 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
2223 /* 1.2 set the loop-latch arg for the reduction-phi: */
2224 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
2226 if (vect_print_dump_info (REPORT_DETAILS))
2228 fprintf (vect_dump, "transform reduction: created def-use cycle:");
2229 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
2230 fprintf (vect_dump, "\n");
2231 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
2235 /*** 2. Create epilog code
2236 The reduction epilog code operates across the elements of the vector
2237 of partial results computed by the vectorized loop.
2238 The reduction epilog code consists of:
2239 step 1: compute the scalar result in a vector (v_out2)
2240 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2241 step 3: adjust the scalar result (s_out3) if needed.
2243 Step 1 can be accomplished using one the following three schemes:
2244 (scheme 1) using reduc_code, if available.
2245 (scheme 2) using whole-vector shifts, if available.
2246 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2249 The overall epilog code looks like this:
2251 s_out0 = phi <s_loop> # original EXIT_PHI
2252 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2253 v_out2 = reduce <v_out1> # step 1
2254 s_out3 = extract_field <v_out2, 0> # step 2
2255 s_out4 = adjust_result <s_out3> # step 3
2257 (step 3 is optional, and step2 1 and 2 may be combined).
2258 Lastly, the uses of s_out0 are replaced by s_out4.
2262 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2263 v_out1 = phi <v_loop> */
2265 exit_bb = single_exit (loop)->dest;
2266 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
2267 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
2268 exit_bsi = bsi_after_labels (exit_bb);
2270 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2271 (i.e. when reduc_code is not available) and in the final adjustment
2272 code (if needed). Also get the original scalar reduction variable as
2273 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2274 represents a reduction pattern), the tree-code and scalar-def are
2275 taken from the original stmt that the pattern-stmt (STMT) replaces.
2276 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2277 are taken from STMT. */
2279 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2282 /* Regular reduction */
2287 /* Reduction pattern */
2288 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2289 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2290 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2292 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2293 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
2294 scalar_type = TREE_TYPE (scalar_dest);
2295 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2296 bitsize = TYPE_SIZE (scalar_type);
2297 bytesize = TYPE_SIZE_UNIT (scalar_type);
2300 /* In case this is a reduction in an inner-loop while vectorizing an outer
2301 loop - we don't need to extract a single scalar result at the end of the
2302 inner-loop. The final vector of partial results will be used in the
2303 vectorized outer-loop, or reduced to a scalar result at the end of the
2305 if (nested_in_vect_loop)
2306 goto vect_finalize_reduction;
2308 /* 2.3 Create the reduction code, using one of the three schemes described
2311 if (reduc_code < NUM_TREE_CODES)
2315 /*** Case 1: Create:
2316 v_out2 = reduc_expr <v_out1> */
2318 if (vect_print_dump_info (REPORT_DETAILS))
2319 fprintf (vect_dump, "Reduce using direct vector reduction.");
2321 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2322 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2323 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2324 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2325 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2326 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2328 extract_scalar_result = true;
2332 enum tree_code shift_code = 0;
2333 bool have_whole_vector_shift = true;
2335 int element_bitsize = tree_low_cst (bitsize, 1);
2336 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2339 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2340 shift_code = VEC_RSHIFT_EXPR;
2342 have_whole_vector_shift = false;
2344 /* Regardless of whether we have a whole vector shift, if we're
2345 emulating the operation via tree-vect-generic, we don't want
2346 to use it. Only the first round of the reduction is likely
2347 to still be profitable via emulation. */
2348 /* ??? It might be better to emit a reduction tree code here, so that
2349 tree-vect-generic can expand the first round via bit tricks. */
2350 if (!VECTOR_MODE_P (mode))
2351 have_whole_vector_shift = false;
2354 optab optab = optab_for_tree_code (code, vectype);
2355 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
2356 have_whole_vector_shift = false;
2359 if (have_whole_vector_shift)
2361 /*** Case 2: Create:
2362 for (offset = VS/2; offset >= element_size; offset/=2)
2364 Create: va' = vec_shift <va, offset>
2365 Create: va = vop <va, va'>
2368 if (vect_print_dump_info (REPORT_DETAILS))
2369 fprintf (vect_dump, "Reduce using vector shifts");
2371 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2372 new_temp = PHI_RESULT (new_phi);
2374 for (bit_offset = vec_size_in_bits/2;
2375 bit_offset >= element_bitsize;
2378 tree bitpos = size_int (bit_offset);
2379 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
2380 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2381 new_name = make_ssa_name (vec_dest, epilog_stmt);
2382 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2383 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2385 tmp = build2 (code, vectype, new_name, new_temp);
2386 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2387 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2388 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2389 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2392 extract_scalar_result = true;
2398 /*** Case 3: Create:
2399 s = extract_field <v_out2, 0>
2400 for (offset = element_size;
2401 offset < vector_size;
2402 offset += element_size;)
2404 Create: s' = extract_field <v_out2, offset>
2405 Create: s = op <s, s'>
2408 if (vect_print_dump_info (REPORT_DETAILS))
2409 fprintf (vect_dump, "Reduce using scalar code. ");
2411 vec_temp = PHI_RESULT (new_phi);
2412 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2413 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2415 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2416 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2417 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2418 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2419 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2421 for (bit_offset = element_bitsize;
2422 bit_offset < vec_size_in_bits;
2423 bit_offset += element_bitsize)
2426 tree bitpos = bitsize_int (bit_offset);
2427 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2430 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2431 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2432 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
2433 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2434 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2436 tmp = build2 (code, scalar_type, new_name, new_temp);
2437 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
2438 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2439 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2440 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2443 extract_scalar_result = false;
2447 /* 2.4 Extract the final scalar result. Create:
2448 s_out3 = extract_field <v_out2, bitpos> */
2450 if (extract_scalar_result)
2454 gcc_assert (!nested_in_vect_loop);
2455 if (vect_print_dump_info (REPORT_DETAILS))
2456 fprintf (vect_dump, "extract scalar result");
2458 if (BYTES_BIG_ENDIAN)
2459 bitpos = size_binop (MULT_EXPR,
2460 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2461 TYPE_SIZE (scalar_type));
2463 bitpos = bitsize_zero_node;
2465 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2466 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2467 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2468 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2469 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2470 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2473 vect_finalize_reduction:
2475 /* 2.5 Adjust the final result by the initial value of the reduction
2476 variable. (When such adjustment is not needed, then
2477 'adjustment_def' is zero). For example, if code is PLUS we create:
2478 new_temp = loop_exit_def + adjustment_def */
2482 if (nested_in_vect_loop)
2484 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2485 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2486 new_dest = vect_create_destination_var (scalar_dest, vectype);
2490 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2491 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2492 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2494 epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
2495 new_temp = make_ssa_name (new_dest, epilog_stmt);
2496 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2497 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2501 /* 2.6 Handle the loop-exit phi */
2503 /* Replace uses of s_out0 with uses of s_out3:
2504 Find the loop-closed-use at the loop exit of the original scalar result.
2505 (The reduction result is expected to have two immediate uses - one at the
2506 latch block, and one at the loop exit). */
2507 phis = VEC_alloc (tree, heap, 10);
2508 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2510 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
2512 exit_phi = USE_STMT (use_p);
2513 VEC_quick_push (tree, phis, exit_phi);
2516 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2517 gcc_assert (!VEC_empty (tree, phis));
2519 for (i = 0; VEC_iterate (tree, phis, i, exit_phi); i++)
2521 if (nested_in_vect_loop)
2523 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2525 /* FORNOW. Currently not supporting the case that an inner-loop reduction
2526 is not used in the outer-loop (but only outside the outer-loop). */
2527 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2528 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2530 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2531 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2532 set_stmt_info (get_stmt_ann (epilog_stmt),
2533 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2537 /* Replace the uses: */
2538 orig_name = PHI_RESULT (exit_phi);
2539 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2540 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2541 SET_USE (use_p, new_temp);
2543 VEC_free (tree, heap, phis);
2547 /* Function vectorizable_reduction.
2549 Check if STMT performs a reduction operation that can be vectorized.
2550 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2551 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2552 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2554 This function also handles reduction idioms (patterns) that have been
2555 recognized in advance during vect_pattern_recog. In this case, STMT may be
2557 X = pattern_expr (arg0, arg1, ..., X)
2558 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2559 sequence that had been detected and replaced by the pattern-stmt (STMT).
2561 In some cases of reduction patterns, the type of the reduction variable X is
2562 different than the type of the other arguments of STMT.
2563 In such cases, the vectype that is used when transforming STMT into a vector
2564 stmt is different than the vectype that is used to determine the
2565 vectorization factor, because it consists of a different number of elements
2566 than the actual number of elements that are being operated upon in parallel.
2568 For example, consider an accumulation of shorts into an int accumulator.
2569 On some targets it's possible to vectorize this pattern operating on 8
2570 shorts at a time (hence, the vectype for purposes of determining the
2571 vectorization factor should be V8HI); on the other hand, the vectype that
2572 is used to create the vector form is actually V4SI (the type of the result).
2574 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2575 indicates what is the actual level of parallelism (V8HI in the example), so
2576 that the right vectorization factor would be derived. This vectype
2577 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2578 be used to create the vectorized stmt. The right vectype for the vectorized
2579 stmt is obtained from the type of the result X:
2580 get_vectype_for_scalar_type (TREE_TYPE (X))
2582 This means that, contrary to "regular" reductions (or "regular" stmts in
2583 general), the following equation:
2584 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2585 does *NOT* necessarily hold for reduction patterns. */
2588 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2593 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2594 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2595 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2596 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2597 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2599 enum tree_code code, orig_code, epilog_reduc_code = 0;
2600 enum machine_mode vec_mode;
2602 optab optab, reduc_optab;
2603 tree new_temp = NULL_TREE;
2605 enum vect_def_type dt;
2610 stmt_vec_info orig_stmt_info;
2611 tree expr = NULL_TREE;
2613 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2614 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2615 stmt_vec_info prev_stmt_info;
2617 tree new_stmt = NULL_TREE;
2620 if (nested_in_vect_loop_p (loop, stmt))
2623 /* FORNOW. This restriction should be relaxed. */
2626 if (vect_print_dump_info (REPORT_DETAILS))
2627 fprintf (vect_dump, "multiple types in nested loop.");
2632 gcc_assert (ncopies >= 1);
2634 /* FORNOW: SLP not supported. */
2635 if (STMT_SLP_TYPE (stmt_info))
2638 /* 1. Is vectorizable reduction? */
2640 /* Not supportable if the reduction variable is used in the loop. */
2641 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
2644 /* Reductions that are not used even in an enclosing outer-loop,
2645 are expected to be "live" (used out of the loop). */
2646 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2647 && !STMT_VINFO_LIVE_P (stmt_info))
2650 /* Make sure it was already recognized as a reduction computation. */
2651 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2654 /* 2. Has this been recognized as a reduction pattern?
2656 Check if STMT represents a pattern that has been recognized
2657 in earlier analysis stages. For stmts that represent a pattern,
2658 the STMT_VINFO_RELATED_STMT field records the last stmt in
2659 the original sequence that constitutes the pattern. */
2661 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2664 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2665 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2666 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2667 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2670 /* 3. Check the operands of the operation. The first operands are defined
2671 inside the loop body. The last operand is the reduction variable,
2672 which is defined by the loop-header-phi. */
2674 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2676 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2677 code = TREE_CODE (operation);
2678 op_type = TREE_OPERAND_LENGTH (operation);
2679 if (op_type != binary_op && op_type != ternary_op)
2681 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2682 scalar_type = TREE_TYPE (scalar_dest);
2684 /* All uses but the last are expected to be defined in the loop.
2685 The last use is the reduction variable. */
2686 for (i = 0; i < op_type-1; i++)
2688 op = TREE_OPERAND (operation, i);
2689 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2690 gcc_assert (is_simple_use);
2691 if (dt != vect_loop_def
2692 && dt != vect_invariant_def
2693 && dt != vect_constant_def
2694 && dt != vect_induction_def)
2698 op = TREE_OPERAND (operation, i);
2699 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2700 gcc_assert (is_simple_use);
2701 gcc_assert (dt == vect_reduction_def);
2702 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2704 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2706 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2708 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2711 /* 4. Supportable by target? */
2713 /* 4.1. check support for the operation in the loop */
2714 optab = optab_for_tree_code (code, vectype);
2717 if (vect_print_dump_info (REPORT_DETAILS))
2718 fprintf (vect_dump, "no optab.");
2721 vec_mode = TYPE_MODE (vectype);
2722 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2724 if (vect_print_dump_info (REPORT_DETAILS))
2725 fprintf (vect_dump, "op not supported by target.");
2726 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2727 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2728 < vect_min_worthwhile_factor (code))
2730 if (vect_print_dump_info (REPORT_DETAILS))
2731 fprintf (vect_dump, "proceeding using word mode.");
2734 /* Worthwhile without SIMD support? */
2735 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2736 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2737 < vect_min_worthwhile_factor (code))
2739 if (vect_print_dump_info (REPORT_DETAILS))
2740 fprintf (vect_dump, "not worthwhile without SIMD support.");
2744 /* 4.2. Check support for the epilog operation.
2746 If STMT represents a reduction pattern, then the type of the
2747 reduction variable may be different than the type of the rest
2748 of the arguments. For example, consider the case of accumulation
2749 of shorts into an int accumulator; The original code:
2750 S1: int_a = (int) short_a;
2751 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2754 STMT: int_acc = widen_sum <short_a, int_acc>
2757 1. The tree-code that is used to create the vector operation in the
2758 epilog code (that reduces the partial results) is not the
2759 tree-code of STMT, but is rather the tree-code of the original
2760 stmt from the pattern that STMT is replacing. I.e, in the example
2761 above we want to use 'widen_sum' in the loop, but 'plus' in the
2763 2. The type (mode) we use to check available target support
2764 for the vector operation to be created in the *epilog*, is
2765 determined by the type of the reduction variable (in the example
2766 above we'd check this: plus_optab[vect_int_mode]).
2767 However the type (mode) we use to check available target support
2768 for the vector operation to be created *inside the loop*, is
2769 determined by the type of the other arguments to STMT (in the
2770 example we'd check this: widen_sum_optab[vect_short_mode]).
2772 This is contrary to "regular" reductions, in which the types of all
2773 the arguments are the same as the type of the reduction variable.
2774 For "regular" reductions we can therefore use the same vector type
2775 (and also the same tree-code) when generating the epilog code and
2776 when generating the code inside the loop. */
2780 /* This is a reduction pattern: get the vectype from the type of the
2781 reduction variable, and get the tree-code from orig_stmt. */
2782 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2783 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2784 vec_mode = TYPE_MODE (vectype);
2788 /* Regular reduction: use the same vectype and tree-code as used for
2789 the vector code inside the loop can be used for the epilog code. */
2793 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2795 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2798 if (vect_print_dump_info (REPORT_DETAILS))
2799 fprintf (vect_dump, "no optab for reduction.");
2800 epilog_reduc_code = NUM_TREE_CODES;
2802 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
2804 if (vect_print_dump_info (REPORT_DETAILS))
2805 fprintf (vect_dump, "reduc op not supported by target.");
2806 epilog_reduc_code = NUM_TREE_CODES;
2809 if (!vec_stmt) /* transformation not required. */
2811 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2812 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
2818 if (vect_print_dump_info (REPORT_DETAILS))
2819 fprintf (vect_dump, "transform reduction.");
2821 /* Create the destination vector */
2822 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2824 /* Create the reduction-phi that defines the reduction-operand. */
2825 new_phi = create_phi_node (vec_dest, loop->header);
2827 /* In case the vectorization factor (VF) is bigger than the number
2828 of elements that we can fit in a vectype (nunits), we have to generate
2829 more than one vector stmt - i.e - we need to "unroll" the
2830 vector stmt by a factor VF/nunits. For more details see documentation
2831 in vectorizable_operation. */
2833 prev_stmt_info = NULL;
2834 for (j = 0; j < ncopies; j++)
2839 op = TREE_OPERAND (operation, 0);
2840 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2841 if (op_type == ternary_op)
2843 op = TREE_OPERAND (operation, 1);
2844 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2847 /* Get the vector def for the reduction variable from the phi node */
2848 reduc_def = PHI_RESULT (new_phi);
2852 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2853 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2854 if (op_type == ternary_op)
2855 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2857 /* Get the vector def for the reduction variable from the vectorized
2858 reduction operation generated in the previous iteration (j-1) */
2859 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2862 /* Arguments are ready. create the new vector stmt. */
2863 if (op_type == binary_op)
2864 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2866 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2868 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2869 new_temp = make_ssa_name (vec_dest, new_stmt);
2870 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2871 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2874 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2876 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2877 prev_stmt_info = vinfo_for_stmt (new_stmt);
2880 /* Finalize the reduction-phi (set it's arguments) and create the
2881 epilog reduction code. */
2882 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2886 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2887 a function declaration if the target has a vectorized version
2888 of the function, or NULL_TREE if the function cannot be vectorized. */
2891 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2893 tree fndecl = get_callee_fndecl (call);
2894 enum built_in_function code;
2896 /* We only handle functions that do not read or clobber memory -- i.e.
2897 const or novops ones. */
2898 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2902 || TREE_CODE (fndecl) != FUNCTION_DECL
2903 || !DECL_BUILT_IN (fndecl))
2906 code = DECL_FUNCTION_CODE (fndecl);
2907 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2911 /* Function vectorizable_call.
2913 Check if STMT performs a function call that can be vectorized.
2914 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2915 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2916 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2919 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2925 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2926 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2927 tree vectype_out, vectype_in;
2930 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2931 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2932 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2933 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2935 int ncopies, j, nargs;
2936 call_expr_arg_iterator iter;
2938 enum { NARROW, NONE, WIDEN } modifier;
2940 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2943 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
2946 /* FORNOW: SLP not supported. */
2947 if (STMT_SLP_TYPE (stmt_info))
2950 /* FORNOW: not yet supported. */
2951 if (STMT_VINFO_LIVE_P (stmt_info))
2953 if (vect_print_dump_info (REPORT_DETAILS))
2954 fprintf (vect_dump, "value used after loop.");
2958 /* Is STMT a vectorizable call? */
2959 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
2962 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
2965 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2966 if (TREE_CODE (operation) != CALL_EXPR)
2969 /* Process function arguments. */
2970 rhs_type = NULL_TREE;
2972 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
2974 /* Bail out if the function has more than two arguments, we
2975 do not have interesting builtin functions to vectorize with
2976 more than two arguments. */
2980 /* We can only handle calls with arguments of the same type. */
2982 && rhs_type != TREE_TYPE (op))
2984 if (vect_print_dump_info (REPORT_DETAILS))
2985 fprintf (vect_dump, "argument types differ.");
2988 rhs_type = TREE_TYPE (op);
2990 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
2992 if (vect_print_dump_info (REPORT_DETAILS))
2993 fprintf (vect_dump, "use not simple.");
3000 /* No arguments is also not good. */
3004 vectype_in = get_vectype_for_scalar_type (rhs_type);
3005 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3007 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
3008 vectype_out = get_vectype_for_scalar_type (lhs_type);
3009 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3012 if (nunits_in == nunits_out / 2)
3014 else if (nunits_out == nunits_in)
3016 else if (nunits_out == nunits_in / 2)
3021 /* For now, we only vectorize functions if a target specific builtin
3022 is available. TODO -- in some cases, it might be profitable to
3023 insert the calls for pieces of the vector, in order to be able
3024 to vectorize other operations in the loop. */
3025 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
3026 if (fndecl == NULL_TREE)
3028 if (vect_print_dump_info (REPORT_DETAILS))
3029 fprintf (vect_dump, "function is not vectorizable.");
3034 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
3036 if (modifier == NARROW)
3037 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3039 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3041 /* Sanity check: make sure that at least one copy of the vectorized stmt
3042 needs to be generated. */
3043 gcc_assert (ncopies >= 1);
3045 /* FORNOW. This restriction should be relaxed. */
3046 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3048 if (vect_print_dump_info (REPORT_DETAILS))
3049 fprintf (vect_dump, "multiple types in nested loop.");
3053 if (!vec_stmt) /* transformation not required. */
3055 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3056 if (vect_print_dump_info (REPORT_DETAILS))
3057 fprintf (vect_dump, "=== vectorizable_call ===");
3058 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3064 if (vect_print_dump_info (REPORT_DETAILS))
3065 fprintf (vect_dump, "transform operation.");
3067 /* FORNOW. This restriction should be relaxed. */
3068 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3070 if (vect_print_dump_info (REPORT_DETAILS))
3071 fprintf (vect_dump, "multiple types in nested loop.");
3076 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3077 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3079 prev_stmt_info = NULL;
3083 for (j = 0; j < ncopies; ++j)
3085 /* Build argument list for the vectorized call. */
3086 /* FIXME: Rewrite this so that it doesn't
3087 construct a temporary list. */
3090 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3094 = vect_get_vec_def_for_operand (op, stmt, NULL);
3097 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3099 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3103 vargs = nreverse (vargs);
3105 rhs = build_function_call_expr (fndecl, vargs);
3106 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3107 new_temp = make_ssa_name (vec_dest, new_stmt);
3108 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3110 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3113 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3115 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3117 prev_stmt_info = vinfo_for_stmt (new_stmt);
3123 for (j = 0; j < ncopies; ++j)
3125 /* Build argument list for the vectorized call. */
3126 /* FIXME: Rewrite this so that it doesn't
3127 construct a temporary list. */
3130 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3135 = vect_get_vec_def_for_operand (op, stmt, NULL);
3137 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3142 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3144 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3147 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3148 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
3152 vargs = nreverse (vargs);
3154 rhs = build_function_call_expr (fndecl, vargs);
3155 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3156 new_temp = make_ssa_name (vec_dest, new_stmt);
3157 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3159 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3162 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3164 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3166 prev_stmt_info = vinfo_for_stmt (new_stmt);
3169 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3174 /* No current target implements this case. */
3178 /* The call in STMT might prevent it from being removed in dce.
3179 We however cannot remove it here, due to the way the ssa name
3180 it defines is mapped to the new definition. So just replace
3181 rhs of the statement with something harmless. */
3182 type = TREE_TYPE (scalar_dest);
3183 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
3190 /* Function vect_gen_widened_results_half
3192 Create a vector stmt whose code, type, number of arguments, and result
3193 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
3194 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3195 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3196 needs to be created (DECL is a function-decl of a target-builtin).
3197 STMT is the original scalar stmt that we are vectorizing. */
3200 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
3201 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3202 tree vec_dest, block_stmt_iterator *bsi,
3211 /* Generate half of the widened result: */
3212 if (code == CALL_EXPR)
3214 /* Target specific support */
3215 if (op_type == binary_op)
3216 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
3218 expr = build_call_expr (decl, 1, vec_oprnd0);
3222 /* Generic support */
3223 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3224 if (op_type == binary_op)
3225 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
3227 expr = build1 (code, vectype, vec_oprnd0);
3229 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3230 new_temp = make_ssa_name (vec_dest, new_stmt);
3231 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3232 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3234 if (code == CALL_EXPR)
3236 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3238 if (TREE_CODE (sym) == SSA_NAME)
3239 sym = SSA_NAME_VAR (sym);
3240 mark_sym_for_renaming (sym);
3248 /* Check if STMT performs a conversion operation, that can be vectorized.
3249 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3250 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3251 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3254 vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
3255 tree *vec_stmt, slp_tree slp_node)
3261 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3262 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3263 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3264 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3265 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3266 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3269 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3270 tree new_stmt = NULL_TREE;
3271 stmt_vec_info prev_stmt_info;
3274 tree vectype_out, vectype_in;
3277 tree rhs_type, lhs_type;
3279 enum { NARROW, NONE, WIDEN } modifier;
3281 VEC(tree,heap) *vec_oprnds0 = NULL;
3284 /* Is STMT a vectorizable conversion? */
3286 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3289 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3292 if (STMT_VINFO_LIVE_P (stmt_info))
3294 /* FORNOW: not yet supported. */
3295 if (vect_print_dump_info (REPORT_DETAILS))
3296 fprintf (vect_dump, "value used after loop.");
3300 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3303 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3306 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3307 code = TREE_CODE (operation);
3308 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3311 /* Check types of lhs and rhs. */
3312 op0 = TREE_OPERAND (operation, 0);
3313 rhs_type = TREE_TYPE (op0);
3314 vectype_in = get_vectype_for_scalar_type (rhs_type);
3315 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3317 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3318 lhs_type = TREE_TYPE (scalar_dest);
3319 vectype_out = get_vectype_for_scalar_type (lhs_type);
3320 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3323 if (nunits_in == nunits_out / 2)
3325 else if (nunits_out == nunits_in)
3327 else if (nunits_out == nunits_in / 2)
3332 if (modifier == NONE)
3333 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3335 /* Bail out if the types are both integral or non-integral. */
3336 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3337 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3340 if (modifier == NARROW)
3341 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3343 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3345 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3346 this, so we can safely override NCOPIES with 1 here. */
3350 /* Sanity check: make sure that at least one copy of the vectorized stmt
3351 needs to be generated. */
3352 gcc_assert (ncopies >= 1);
3354 /* FORNOW. This restriction should be relaxed. */
3355 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3357 if (vect_print_dump_info (REPORT_DETAILS))
3358 fprintf (vect_dump, "multiple types in nested loop.");
3362 /* Check the operands of the operation. */
3363 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3365 if (vect_print_dump_info (REPORT_DETAILS))
3366 fprintf (vect_dump, "use not simple.");
3370 /* Supportable by target? */
3371 if ((modifier == NONE
3372 && !targetm.vectorize.builtin_conversion (code, vectype_in))
3373 || (modifier == WIDEN
3374 && !supportable_widening_operation (code, stmt, vectype_in,
3377 || (modifier == NARROW
3378 && !supportable_narrowing_operation (code, stmt, vectype_in,
3381 if (vect_print_dump_info (REPORT_DETAILS))
3382 fprintf (vect_dump, "op not supported by target.");
3386 if (modifier != NONE)
3388 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3389 /* FORNOW: SLP not supported. */
3390 if (STMT_SLP_TYPE (stmt_info))
3394 if (!vec_stmt) /* transformation not required. */
3396 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3401 if (vect_print_dump_info (REPORT_DETAILS))
3402 fprintf (vect_dump, "transform conversion.");
3405 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3407 if (modifier == NONE && !slp_node)
3408 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3410 prev_stmt_info = NULL;
3414 for (j = 0; j < ncopies; j++)
3420 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
3422 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3425 targetm.vectorize.builtin_conversion (code, vectype_in);
3426 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3428 new_stmt = build_call_expr (builtin_decl, 1, vop0);
3430 /* Arguments are ready. create the new vector stmt. */
3431 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
3432 new_temp = make_ssa_name (vec_dest, new_stmt);
3433 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3434 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3435 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3436 SSA_OP_ALL_VIRTUALS)
3438 if (TREE_CODE (sym) == SSA_NAME)
3439 sym = SSA_NAME_VAR (sym);
3440 mark_sym_for_renaming (sym);
3443 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3447 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3449 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3450 prev_stmt_info = vinfo_for_stmt (new_stmt);
3455 /* In case the vectorization factor (VF) is bigger than the number
3456 of elements that we can fit in a vectype (nunits), we have to
3457 generate more than one vector stmt - i.e - we need to "unroll"
3458 the vector stmt by a factor VF/nunits. */
3459 for (j = 0; j < ncopies; j++)
3462 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3464 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3466 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3468 /* Generate first half of the widened result: */
3470 = vect_gen_widened_results_half (code1, vectype_out, decl1,
3471 vec_oprnd0, vec_oprnd1,
3472 unary_op, vec_dest, bsi, stmt);
3474 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3476 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3477 prev_stmt_info = vinfo_for_stmt (new_stmt);
3479 /* Generate second half of the widened result: */
3481 = vect_gen_widened_results_half (code2, vectype_out, decl2,
3482 vec_oprnd0, vec_oprnd1,
3483 unary_op, vec_dest, bsi, stmt);
3484 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3485 prev_stmt_info = vinfo_for_stmt (new_stmt);
3490 /* In case the vectorization factor (VF) is bigger than the number
3491 of elements that we can fit in a vectype (nunits), we have to
3492 generate more than one vector stmt - i.e - we need to "unroll"
3493 the vector stmt by a factor VF/nunits. */
3494 for (j = 0; j < ncopies; j++)
3499 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3500 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3504 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3505 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3508 /* Arguments are ready. Create the new vector stmt. */
3509 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3510 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3511 new_temp = make_ssa_name (vec_dest, new_stmt);
3512 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3513 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3516 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3518 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3520 prev_stmt_info = vinfo_for_stmt (new_stmt);
3523 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3530 /* Function vectorizable_assignment.
3532 Check if STMT performs an assignment (copy) that can be vectorized.
3533 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3534 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3535 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3538 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3544 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3545 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3546 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3549 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3550 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3551 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3553 VEC(tree,heap) *vec_oprnds = NULL;
3556 gcc_assert (ncopies >= 1);
3558 return false; /* FORNOW */
3560 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3563 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3566 /* FORNOW: not yet supported. */
3567 if (STMT_VINFO_LIVE_P (stmt_info))
3569 if (vect_print_dump_info (REPORT_DETAILS))
3570 fprintf (vect_dump, "value used after loop.");
3574 /* Is vectorizable assignment? */
3575 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3578 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3579 if (TREE_CODE (scalar_dest) != SSA_NAME)
3582 op = GIMPLE_STMT_OPERAND (stmt, 1);
3583 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
3585 if (vect_print_dump_info (REPORT_DETAILS))
3586 fprintf (vect_dump, "use not simple.");
3590 if (!vec_stmt) /* transformation not required. */
3592 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3593 if (vect_print_dump_info (REPORT_DETAILS))
3594 fprintf (vect_dump, "=== vectorizable_assignment ===");
3595 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3600 if (vect_print_dump_info (REPORT_DETAILS))
3601 fprintf (vect_dump, "transform assignment.");
3604 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3607 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3609 /* Arguments are ready. create the new vector stmt. */
3610 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3612 *vec_stmt = build_gimple_modify_stmt (vec_dest, vop);
3613 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3614 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3615 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3616 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3619 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3622 VEC_free (tree, heap, vec_oprnds);
3627 /* Function vect_min_worthwhile_factor.
3629 For a loop where we could vectorize the operation indicated by CODE,
3630 return the minimum vectorization factor that makes it worthwhile
3631 to use generic vectors. */
3633 vect_min_worthwhile_factor (enum tree_code code)
3654 /* Function vectorizable_induction
3656 Check if PHI performs an induction computation that can be vectorized.
3657 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3658 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3659 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3662 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3665 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3666 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3667 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3668 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3669 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3672 gcc_assert (ncopies >= 1);
3674 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3677 /* FORNOW: SLP not supported. */
3678 if (STMT_SLP_TYPE (stmt_info))
3681 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3683 if (STMT_VINFO_LIVE_P (stmt_info))
3685 /* FORNOW: not yet supported. */
3686 if (vect_print_dump_info (REPORT_DETAILS))
3687 fprintf (vect_dump, "value used after loop.");
3691 if (TREE_CODE (phi) != PHI_NODE)
3694 if (!vec_stmt) /* transformation not required. */
3696 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3697 if (vect_print_dump_info (REPORT_DETAILS))
3698 fprintf (vect_dump, "=== vectorizable_induction ===");
3699 vect_model_induction_cost (stmt_info, ncopies);
3705 if (vect_print_dump_info (REPORT_DETAILS))
3706 fprintf (vect_dump, "transform induction phi.");
3708 vec_def = get_initial_def_for_induction (phi);
3709 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3714 /* Function vectorizable_operation.
3716 Check if STMT performs a binary or unary operation that can be vectorized.
3717 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3718 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3719 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3722 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3728 tree op0, op1 = NULL;
3729 tree vec_oprnd1 = NULL_TREE;
3730 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3731 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3732 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3733 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3734 enum tree_code code;
3735 enum machine_mode vec_mode;
3740 enum machine_mode optab_op2_mode;
3742 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3743 tree new_stmt = NULL_TREE;
3744 stmt_vec_info prev_stmt_info;
3745 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3748 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3750 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3753 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3754 this, so we can safely override NCOPIES with 1 here. */
3757 gcc_assert (ncopies >= 1);
3758 /* FORNOW. This restriction should be relaxed. */
3759 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3761 if (vect_print_dump_info (REPORT_DETAILS))
3762 fprintf (vect_dump, "multiple types in nested loop.");
3766 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3769 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3772 /* FORNOW: not yet supported. */
3773 if (STMT_VINFO_LIVE_P (stmt_info))
3775 if (vect_print_dump_info (REPORT_DETAILS))
3776 fprintf (vect_dump, "value used after loop.");
3780 /* Is STMT a vectorizable binary/unary operation? */
3781 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3784 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3787 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3788 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3789 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3790 if (nunits_out != nunits_in)
3793 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3794 code = TREE_CODE (operation);
3796 /* For pointer addition, we should use the normal plus for
3797 the vector addition. */
3798 if (code == POINTER_PLUS_EXPR)
3801 optab = optab_for_tree_code (code, vectype);
3803 /* Support only unary or binary operations. */
3804 op_type = TREE_OPERAND_LENGTH (operation);
3805 if (op_type != unary_op && op_type != binary_op)
3807 if (vect_print_dump_info (REPORT_DETAILS))
3808 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3812 op0 = TREE_OPERAND (operation, 0);
3813 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3815 if (vect_print_dump_info (REPORT_DETAILS))
3816 fprintf (vect_dump, "use not simple.");
3820 if (op_type == binary_op)
3822 op1 = TREE_OPERAND (operation, 1);
3823 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3825 if (vect_print_dump_info (REPORT_DETAILS))
3826 fprintf (vect_dump, "use not simple.");
3831 /* Supportable by target? */
3834 if (vect_print_dump_info (REPORT_DETAILS))
3835 fprintf (vect_dump, "no optab.");
3838 vec_mode = TYPE_MODE (vectype);
3839 icode = (int) optab_handler (optab, vec_mode)->insn_code;
3840 if (icode == CODE_FOR_nothing)
3842 if (vect_print_dump_info (REPORT_DETAILS))
3843 fprintf (vect_dump, "op not supported by target.");
3844 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3845 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3846 < vect_min_worthwhile_factor (code))
3848 if (vect_print_dump_info (REPORT_DETAILS))
3849 fprintf (vect_dump, "proceeding using word mode.");
3852 /* Worthwhile without SIMD support? */
3853 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3854 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3855 < vect_min_worthwhile_factor (code))
3857 if (vect_print_dump_info (REPORT_DETAILS))
3858 fprintf (vect_dump, "not worthwhile without SIMD support.");
3862 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3864 /* FORNOW: not yet supported. */
3865 if (!VECTOR_MODE_P (vec_mode))
3868 /* Invariant argument is needed for a vector shift
3869 by a scalar shift operand. */
3870 optab_op2_mode = insn_data[icode].operand[2].mode;
3871 if (! (VECTOR_MODE_P (optab_op2_mode)
3872 || dt[1] == vect_constant_def
3873 || dt[1] == vect_invariant_def))
3875 if (vect_print_dump_info (REPORT_DETAILS))
3876 fprintf (vect_dump, "operand mode requires invariant argument.");
3881 if (!vec_stmt) /* transformation not required. */
3883 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3884 if (vect_print_dump_info (REPORT_DETAILS))
3885 fprintf (vect_dump, "=== vectorizable_operation ===");
3886 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3892 if (vect_print_dump_info (REPORT_DETAILS))
3893 fprintf (vect_dump, "transform binary/unary operation.");
3896 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3900 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3901 if (op_type == binary_op)
3902 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3905 /* In case the vectorization factor (VF) is bigger than the number
3906 of elements that we can fit in a vectype (nunits), we have to generate
3907 more than one vector stmt - i.e - we need to "unroll" the
3908 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3909 from one copy of the vector stmt to the next, in the field
3910 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3911 stages to find the correct vector defs to be used when vectorizing
3912 stmts that use the defs of the current stmt. The example below illustrates
3913 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3914 4 vectorized stmts):
3916 before vectorization:
3917 RELATED_STMT VEC_STMT
3921 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3923 RELATED_STMT VEC_STMT
3924 VS1_0: vx0 = memref0 VS1_1 -
3925 VS1_1: vx1 = memref1 VS1_2 -
3926 VS1_2: vx2 = memref2 VS1_3 -
3927 VS1_3: vx3 = memref3 - -
3928 S1: x = load - VS1_0
3931 step2: vectorize stmt S2 (done here):
3932 To vectorize stmt S2 we first need to find the relevant vector
3933 def for the first operand 'x'. This is, as usual, obtained from
3934 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3935 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3936 relevant vector def 'vx0'. Having found 'vx0' we can generate
3937 the vector stmt VS2_0, and as usual, record it in the
3938 STMT_VINFO_VEC_STMT of stmt S2.
3939 When creating the second copy (VS2_1), we obtain the relevant vector
3940 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3941 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3942 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3943 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3944 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3945 chain of stmts and pointers:
3946 RELATED_STMT VEC_STMT
3947 VS1_0: vx0 = memref0 VS1_1 -
3948 VS1_1: vx1 = memref1 VS1_2 -
3949 VS1_2: vx2 = memref2 VS1_3 -
3950 VS1_3: vx3 = memref3 - -
3951 S1: x = load - VS1_0
3952 VS2_0: vz0 = vx0 + v1 VS2_1 -
3953 VS2_1: vz1 = vx1 + v1 VS2_2 -
3954 VS2_2: vz2 = vx2 + v1 VS2_3 -
3955 VS2_3: vz3 = vx3 + v1 - -
3956 S2: z = x + 1 - VS2_0 */
3958 prev_stmt_info = NULL;
3959 for (j = 0; j < ncopies; j++)
3964 if (op_type == binary_op
3965 && (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3968 /* Vector shl and shr insn patterns can be defined with scalar
3969 operand 2 (shift operand). In this case, use constant or loop
3970 invariant op1 directly, without extending it to vector mode
3972 optab_op2_mode = insn_data[icode].operand[2].mode;
3973 if (!VECTOR_MODE_P (optab_op2_mode))
3975 if (vect_print_dump_info (REPORT_DETAILS))
3976 fprintf (vect_dump, "operand 1 using scalar mode.");
3978 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
3982 if (op_type == binary_op && !vec_oprnd1)
3983 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
3986 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, &vec_oprnds1,
3990 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
3992 /* Arguments are ready. Create the new vector stmt. */
3993 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3995 if (op_type == binary_op)
3997 vop1 = VEC_index (tree, vec_oprnds1, i);
3998 new_stmt = build_gimple_modify_stmt (vec_dest,
3999 build2 (code, vectype, vop0, vop1));
4002 new_stmt = build_gimple_modify_stmt (vec_dest,
4003 build1 (code, vectype, vop0));
4005 new_temp = make_ssa_name (vec_dest, new_stmt);
4006 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4007 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4009 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4013 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4015 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4016 prev_stmt_info = vinfo_for_stmt (new_stmt);
4019 VEC_free (tree, heap, vec_oprnds0);
4021 VEC_free (tree, heap, vec_oprnds1);
4027 /* Function vectorizable_type_demotion
4029 Check if STMT performs a binary or unary operation that involves
4030 type demotion, and if it can be vectorized.
4031 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4032 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4033 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4036 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
4043 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4044 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4045 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4046 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4047 enum tree_code code, code1 = ERROR_MARK;
4050 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4052 stmt_vec_info prev_stmt_info;
4061 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4064 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4067 /* FORNOW: not yet supported. */
4068 if (STMT_VINFO_LIVE_P (stmt_info))
4070 if (vect_print_dump_info (REPORT_DETAILS))
4071 fprintf (vect_dump, "value used after loop.");
4075 /* Is STMT a vectorizable type-demotion operation? */
4076 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4079 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4082 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4083 code = TREE_CODE (operation);
4084 if (code != NOP_EXPR && code != CONVERT_EXPR)
4087 op0 = TREE_OPERAND (operation, 0);
4088 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4089 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4091 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4092 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4093 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4094 if (nunits_in != nunits_out / 2) /* FORNOW */
4097 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4098 gcc_assert (ncopies >= 1);
4099 /* FORNOW. This restriction should be relaxed. */
4100 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4102 if (vect_print_dump_info (REPORT_DETAILS))
4103 fprintf (vect_dump, "multiple types in nested loop.");
4107 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4108 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4109 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4110 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4111 && (code == NOP_EXPR || code == CONVERT_EXPR))))
4114 /* Check the operands of the operation. */
4115 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4117 if (vect_print_dump_info (REPORT_DETAILS))
4118 fprintf (vect_dump, "use not simple.");
4122 /* Supportable by target? */
4123 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
4126 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4128 if (!vec_stmt) /* transformation not required. */
4130 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4131 if (vect_print_dump_info (REPORT_DETAILS))
4132 fprintf (vect_dump, "=== vectorizable_demotion ===");
4133 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
4138 if (vect_print_dump_info (REPORT_DETAILS))
4139 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
4143 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4145 /* In case the vectorization factor (VF) is bigger than the number
4146 of elements that we can fit in a vectype (nunits), we have to generate
4147 more than one vector stmt - i.e - we need to "unroll" the
4148 vector stmt by a factor VF/nunits. */
4149 prev_stmt_info = NULL;
4150 for (j = 0; j < ncopies; j++)
4155 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4156 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4160 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
4161 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4164 /* Arguments are ready. Create the new vector stmt. */
4165 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
4166 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
4167 new_temp = make_ssa_name (vec_dest, new_stmt);
4168 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4169 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4172 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4174 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4176 prev_stmt_info = vinfo_for_stmt (new_stmt);
4179 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4184 /* Function vectorizable_type_promotion
4186 Check if STMT performs a binary or unary operation that involves
4187 type promotion, and if it can be vectorized.
4188 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4189 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4190 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4193 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
4199 tree op0, op1 = NULL;
4200 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4201 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4202 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4203 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4204 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4205 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4208 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4210 stmt_vec_info prev_stmt_info;
4218 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4221 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4224 /* FORNOW: not yet supported. */
4225 if (STMT_VINFO_LIVE_P (stmt_info))
4227 if (vect_print_dump_info (REPORT_DETAILS))
4228 fprintf (vect_dump, "value used after loop.");
4232 /* Is STMT a vectorizable type-promotion operation? */
4233 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4236 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4239 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4240 code = TREE_CODE (operation);
4241 if (code != NOP_EXPR && code != CONVERT_EXPR
4242 && code != WIDEN_MULT_EXPR)
4245 op0 = TREE_OPERAND (operation, 0);
4246 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4247 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4249 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4250 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4251 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4252 if (nunits_out != nunits_in / 2) /* FORNOW */
4255 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4256 gcc_assert (ncopies >= 1);
4257 /* FORNOW. This restriction should be relaxed. */
4258 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4260 if (vect_print_dump_info (REPORT_DETAILS))
4261 fprintf (vect_dump, "multiple types in nested loop.");
4265 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4266 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4267 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4268 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4269 && (code == CONVERT_EXPR || code == NOP_EXPR))))
4272 /* Check the operands of the operation. */
4273 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4275 if (vect_print_dump_info (REPORT_DETAILS))
4276 fprintf (vect_dump, "use not simple.");
4280 op_type = TREE_CODE_LENGTH (code);
4281 if (op_type == binary_op)
4283 op1 = TREE_OPERAND (operation, 1);
4284 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
4286 if (vect_print_dump_info (REPORT_DETAILS))
4287 fprintf (vect_dump, "use not simple.");
4292 /* Supportable by target? */
4293 if (!supportable_widening_operation (code, stmt, vectype_in,
4294 &decl1, &decl2, &code1, &code2))
4297 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4299 if (!vec_stmt) /* transformation not required. */
4301 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4302 if (vect_print_dump_info (REPORT_DETAILS))
4303 fprintf (vect_dump, "=== vectorizable_promotion ===");
4304 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
4310 if (vect_print_dump_info (REPORT_DETAILS))
4311 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
4315 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4317 /* In case the vectorization factor (VF) is bigger than the number
4318 of elements that we can fit in a vectype (nunits), we have to generate
4319 more than one vector stmt - i.e - we need to "unroll" the
4320 vector stmt by a factor VF/nunits. */
4322 prev_stmt_info = NULL;
4323 for (j = 0; j < ncopies; j++)
4328 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4329 if (op_type == binary_op)
4330 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
4334 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4335 if (op_type == binary_op)
4336 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
4339 /* Arguments are ready. Create the new vector stmt. We are creating
4340 two vector defs because the widened result does not fit in one vector.
4341 The vectorized stmt can be expressed as a call to a taregt builtin,
4342 or a using a tree-code. */
4343 /* Generate first half of the widened result: */
4344 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
4345 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4347 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4349 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4350 prev_stmt_info = vinfo_for_stmt (new_stmt);
4352 /* Generate second half of the widened result: */
4353 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
4354 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4355 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4356 prev_stmt_info = vinfo_for_stmt (new_stmt);
4360 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4365 /* Function vect_strided_store_supported.
4367 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4368 and FALSE otherwise. */
4371 vect_strided_store_supported (tree vectype)
4373 optab interleave_high_optab, interleave_low_optab;
4376 mode = (int) TYPE_MODE (vectype);
4378 /* Check that the operation is supported. */
4379 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
4381 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
4383 if (!interleave_high_optab || !interleave_low_optab)
4385 if (vect_print_dump_info (REPORT_DETAILS))
4386 fprintf (vect_dump, "no optab for interleave.");
4390 if (optab_handler (interleave_high_optab, mode)->insn_code
4392 || optab_handler (interleave_low_optab, mode)->insn_code
4393 == CODE_FOR_nothing)
4395 if (vect_print_dump_info (REPORT_DETAILS))
4396 fprintf (vect_dump, "interleave op not supported by target.");
4404 /* Function vect_permute_store_chain.
4406 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4407 a power of 2, generate interleave_high/low stmts to reorder the data
4408 correctly for the stores. Return the final references for stores in
4411 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4412 The input is 4 vectors each containing 8 elements. We assign a number to each
4413 element, the input sequence is:
4415 1st vec: 0 1 2 3 4 5 6 7
4416 2nd vec: 8 9 10 11 12 13 14 15
4417 3rd vec: 16 17 18 19 20 21 22 23
4418 4th vec: 24 25 26 27 28 29 30 31
4420 The output sequence should be:
4422 1st vec: 0 8 16 24 1 9 17 25
4423 2nd vec: 2 10 18 26 3 11 19 27
4424 3rd vec: 4 12 20 28 5 13 21 30
4425 4th vec: 6 14 22 30 7 15 23 31
4427 i.e., we interleave the contents of the four vectors in their order.
4429 We use interleave_high/low instructions to create such output. The input of
4430 each interleave_high/low operation is two vectors:
4433 the even elements of the result vector are obtained left-to-right from the
4434 high/low elements of the first vector. The odd elements of the result are
4435 obtained left-to-right from the high/low elements of the second vector.
4436 The output of interleave_high will be: 0 4 1 5
4437 and of interleave_low: 2 6 3 7
4440 The permutation is done in log LENGTH stages. In each stage interleave_high
4441 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4442 where the first argument is taken from the first half of DR_CHAIN and the
4443 second argument from it's second half.
4446 I1: interleave_high (1st vec, 3rd vec)
4447 I2: interleave_low (1st vec, 3rd vec)
4448 I3: interleave_high (2nd vec, 4th vec)
4449 I4: interleave_low (2nd vec, 4th vec)
4451 The output for the first stage is:
4453 I1: 0 16 1 17 2 18 3 19
4454 I2: 4 20 5 21 6 22 7 23
4455 I3: 8 24 9 25 10 26 11 27
4456 I4: 12 28 13 29 14 30 15 31
4458 The output of the second stage, i.e. the final result is:
4460 I1: 0 8 16 24 1 9 17 25
4461 I2: 2 10 18 26 3 11 19 27
4462 I3: 4 12 20 28 5 13 21 30
4463 I4: 6 14 22 30 7 15 23 31. */
4466 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4467 unsigned int length,
4469 block_stmt_iterator *bsi,
4470 VEC(tree,heap) **result_chain)
4472 tree perm_dest, perm_stmt, vect1, vect2, high, low;
4473 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4474 tree scalar_dest, tmp;
4477 VEC(tree,heap) *first, *second;
4479 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4480 first = VEC_alloc (tree, heap, length/2);
4481 second = VEC_alloc (tree, heap, length/2);
4483 /* Check that the operation is supported. */
4484 if (!vect_strided_store_supported (vectype))
4487 *result_chain = VEC_copy (tree, heap, dr_chain);
4489 for (i = 0; i < exact_log2 (length); i++)
4491 for (j = 0; j < length/2; j++)
4493 vect1 = VEC_index (tree, dr_chain, j);
4494 vect2 = VEC_index (tree, dr_chain, j+length/2);
4496 /* Create interleaving stmt:
4497 in the case of big endian:
4498 high = interleave_high (vect1, vect2)
4499 and in the case of little endian:
4500 high = interleave_low (vect1, vect2). */
4501 perm_dest = create_tmp_var (vectype, "vect_inter_high");
4502 DECL_GIMPLE_REG_P (perm_dest) = 1;
4503 add_referenced_var (perm_dest);
4504 if (BYTES_BIG_ENDIAN)
4505 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4507 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4508 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4509 high = make_ssa_name (perm_dest, perm_stmt);
4510 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
4511 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4512 VEC_replace (tree, *result_chain, 2*j, high);
4514 /* Create interleaving stmt:
4515 in the case of big endian:
4516 low = interleave_low (vect1, vect2)
4517 and in the case of little endian:
4518 low = interleave_high (vect1, vect2). */
4519 perm_dest = create_tmp_var (vectype, "vect_inter_low");
4520 DECL_GIMPLE_REG_P (perm_dest) = 1;
4521 add_referenced_var (perm_dest);
4522 if (BYTES_BIG_ENDIAN)
4523 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4525 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4526 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4527 low = make_ssa_name (perm_dest, perm_stmt);
4528 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
4529 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4530 VEC_replace (tree, *result_chain, 2*j+1, low);
4532 dr_chain = VEC_copy (tree, heap, *result_chain);
4538 /* Function vectorizable_store.
4540 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4547 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
4553 tree vec_oprnd = NULL_TREE;
4554 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4555 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4556 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4557 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4558 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4559 enum machine_mode vec_mode;
4561 enum dr_alignment_support alignment_support_scheme;
4563 enum vect_def_type dt;
4564 stmt_vec_info prev_stmt_info = NULL;
4565 tree dataref_ptr = NULL_TREE;
4566 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4567 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4569 tree next_stmt, first_stmt;
4570 bool strided_store = false;
4571 unsigned int group_size, i;
4572 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
4574 VEC(tree,heap) *vec_oprnds = NULL;
4575 bool slp = (slp_node != NULL);
4576 stmt_vec_info first_stmt_vinfo;
4577 unsigned int vec_num;
4579 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
4580 this, so we can safely override NCOPIES with 1 here. */
4584 gcc_assert (ncopies >= 1);
4586 /* FORNOW. This restriction should be relaxed. */
4587 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4589 if (vect_print_dump_info (REPORT_DETAILS))
4590 fprintf (vect_dump, "multiple types in nested loop.");
4594 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4597 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4600 if (STMT_VINFO_LIVE_P (stmt_info))
4602 if (vect_print_dump_info (REPORT_DETAILS))
4603 fprintf (vect_dump, "value used after loop.");
4607 /* Is vectorizable store? */
4609 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4612 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4613 if (TREE_CODE (scalar_dest) != ARRAY_REF
4614 && TREE_CODE (scalar_dest) != INDIRECT_REF
4615 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
4618 op = GIMPLE_STMT_OPERAND (stmt, 1);
4619 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4621 if (vect_print_dump_info (REPORT_DETAILS))
4622 fprintf (vect_dump, "use not simple.");
4626 vec_mode = TYPE_MODE (vectype);
4627 /* FORNOW. In some cases can vectorize even if data-type not supported
4628 (e.g. - array initialization with 0). */
4629 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
4632 if (!STMT_VINFO_DATA_REF (stmt_info))
4635 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4637 strided_store = true;
4638 if (!vect_strided_store_supported (vectype)
4639 && !PURE_SLP_STMT (stmt_info) && !slp)
4643 if (!vec_stmt) /* transformation not required. */
4645 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4646 if (!PURE_SLP_STMT (stmt_info))
4647 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
4655 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4656 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4657 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4659 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4662 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
4664 /* We vectorize all the stmts of the interleaving group when we
4665 reach the last stmt in the group. */
4666 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4667 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
4670 *vec_stmt = NULL_TREE;
4675 strided_store = false;
4677 /* VEC_NUM is the number of vect stmts to be created for this group. */
4678 if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
4679 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4681 vec_num = group_size;
4687 group_size = vec_num = 1;
4688 first_stmt_vinfo = stmt_info;
4691 if (vect_print_dump_info (REPORT_DETAILS))
4692 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
4694 dr_chain = VEC_alloc (tree, heap, group_size);
4695 oprnds = VEC_alloc (tree, heap, group_size);
4697 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
4698 gcc_assert (alignment_support_scheme);
4699 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
4701 /* In case the vectorization factor (VF) is bigger than the number
4702 of elements that we can fit in a vectype (nunits), we have to generate
4703 more than one vector stmt - i.e - we need to "unroll" the
4704 vector stmt by a factor VF/nunits. For more details see documentation in
4705 vect_get_vec_def_for_copy_stmt. */
4707 /* In case of interleaving (non-unit strided access):
4714 We create vectorized stores starting from base address (the access of the
4715 first stmt in the chain (S2 in the above example), when the last store stmt
4716 of the chain (S4) is reached:
4719 VS2: &base + vec_size*1 = vx0
4720 VS3: &base + vec_size*2 = vx1
4721 VS4: &base + vec_size*3 = vx3
4723 Then permutation statements are generated:
4725 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4726 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4729 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4730 (the order of the data-refs in the output of vect_permute_store_chain
4731 corresponds to the order of scalar stmts in the interleaving chain - see
4732 the documentation of vect_permute_store_chain()).
4734 In case of both multiple types and interleaving, above vector stores and
4735 permutation stmts are created for every copy. The result vector stmts are
4736 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4737 STMT_VINFO_RELATED_STMT for the next copies.
4740 prev_stmt_info = NULL;
4741 for (j = 0; j < ncopies; j++)
4750 /* Get vectorized arguments for SLP_NODE. */
4751 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
4753 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4757 /* For interleaved stores we collect vectorized defs for all the
4758 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4759 used as an input to vect_permute_store_chain(), and OPRNDS as
4760 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4762 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4763 OPRNDS are of size 1. */
4764 next_stmt = first_stmt;
4765 for (i = 0; i < group_size; i++)
4767 /* Since gaps are not supported for interleaved stores,
4768 GROUP_SIZE is the exact number of stmts in the chain.
4769 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4770 there is no interleaving, GROUP_SIZE is 1, and only one
4771 iteration of the loop will be executed. */
4772 gcc_assert (next_stmt);
4773 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4775 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4777 VEC_quick_push(tree, dr_chain, vec_oprnd);
4778 VEC_quick_push(tree, oprnds, vec_oprnd);
4779 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4782 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
4783 &dummy, &ptr_incr, false,
4784 TREE_TYPE (vec_oprnd), &inv_p);
4785 gcc_assert (!inv_p);
4789 /* FORNOW SLP doesn't work for multiple types. */
4792 /* For interleaved stores we created vectorized defs for all the
4793 defs stored in OPRNDS in the previous iteration (previous copy).
4794 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4795 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4797 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4798 OPRNDS are of size 1. */
4799 for (i = 0; i < group_size; i++)
4801 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt,
4802 VEC_index (tree, oprnds, i));
4803 VEC_replace(tree, dr_chain, i, vec_oprnd);
4804 VEC_replace(tree, oprnds, i, vec_oprnd);
4807 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
4812 result_chain = VEC_alloc (tree, heap, group_size);
4814 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4819 next_stmt = first_stmt;
4820 for (i = 0; i < vec_num; i++)
4823 /* Bump the vector pointer. */
4824 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
4828 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4829 else if (strided_store)
4830 /* For strided stores vectorized defs are interleaved in
4831 vect_permute_store_chain(). */
4832 vec_oprnd = VEC_index (tree, result_chain, i);
4834 data_ref = build_fold_indirect_ref (dataref_ptr);
4835 /* Arguments are ready. Create the new vector stmt. */
4836 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4837 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4838 mark_symbols_for_renaming (new_stmt);
4841 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4843 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4845 prev_stmt_info = vinfo_for_stmt (new_stmt);
4846 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4856 /* Function vect_setup_realignment
4858 This function is called when vectorizing an unaligned load using
4859 the dr_explicit_realign[_optimized] scheme.
4860 This function generates the following code at the loop prolog:
4863 x msq_init = *(floor(p)); # prolog load
4864 realignment_token = call target_builtin;
4866 x msq = phi (msq_init, ---)
4868 The stmts marked with x are generated only for the case of
4869 dr_explicit_realign_optimized.
4871 The code above sets up a new (vector) pointer, pointing to the first
4872 location accessed by STMT, and a "floor-aligned" load using that pointer.
4873 It also generates code to compute the "realignment-token" (if the relevant
4874 target hook was defined), and creates a phi-node at the loop-header bb
4875 whose arguments are the result of the prolog-load (created by this
4876 function) and the result of a load that takes place in the loop (to be
4877 created by the caller to this function).
4879 For the case of dr_explicit_realign_optimized:
4880 The caller to this function uses the phi-result (msq) to create the
4881 realignment code inside the loop, and sets up the missing phi argument,
4884 msq = phi (msq_init, lsq)
4885 lsq = *(floor(p')); # load in loop
4886 result = realign_load (msq, lsq, realignment_token);
4888 For the case of dr_explicit_realign:
4890 msq = *(floor(p)); # load in loop
4892 lsq = *(floor(p')); # load in loop
4893 result = realign_load (msq, lsq, realignment_token);
4896 STMT - (scalar) load stmt to be vectorized. This load accesses
4897 a memory location that may be unaligned.
4898 BSI - place where new code is to be inserted.
4899 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4903 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4904 target hook, if defined.
4905 Return value - the result of the loop-header phi node. */
4908 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4909 tree *realignment_token,
4910 enum dr_alignment_support alignment_support_scheme,
4912 struct loop **at_loop)
4914 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4915 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4916 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4917 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4919 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4926 tree msq_init = NULL_TREE;
4929 tree msq = NULL_TREE;
4930 tree stmts = NULL_TREE;
4932 bool compute_in_loop = false;
4933 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4934 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
4935 struct loop *loop_for_initial_load;
4937 gcc_assert (alignment_support_scheme == dr_explicit_realign
4938 || alignment_support_scheme == dr_explicit_realign_optimized);
4940 /* We need to generate three things:
4941 1. the misalignment computation
4942 2. the extra vector load (for the optimized realignment scheme).
4943 3. the phi node for the two vectors from which the realignment is
4944 done (for the optimized realignment scheme).
4947 /* 1. Determine where to generate the misalignment computation.
4949 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4950 calculation will be generated by this function, outside the loop (in the
4951 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4952 caller, inside the loop.
4954 Background: If the misalignment remains fixed throughout the iterations of
4955 the loop, then both realignment schemes are applicable, and also the
4956 misalignment computation can be done outside LOOP. This is because we are
4957 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4958 are a multiple of VS (the Vector Size), and therefore the misalignment in
4959 different vectorized LOOP iterations is always the same.
4960 The problem arises only if the memory access is in an inner-loop nested
4961 inside LOOP, which is now being vectorized using outer-loop vectorization.
4962 This is the only case when the misalignment of the memory access may not
4963 remain fixed throughout the iterations of the inner-loop (as explained in
4964 detail in vect_supportable_dr_alignment). In this case, not only is the
4965 optimized realignment scheme not applicable, but also the misalignment
4966 computation (and generation of the realignment token that is passed to
4967 REALIGN_LOAD) have to be done inside the loop.
4969 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4970 or not, which in turn determines if the misalignment is computed inside
4971 the inner-loop, or outside LOOP. */
4973 if (init_addr != NULL_TREE)
4975 compute_in_loop = true;
4976 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4980 /* 2. Determine where to generate the extra vector load.
4982 For the optimized realignment scheme, instead of generating two vector
4983 loads in each iteration, we generate a single extra vector load in the
4984 preheader of the loop, and in each iteration reuse the result of the
4985 vector load from the previous iteration. In case the memory access is in
4986 an inner-loop nested inside LOOP, which is now being vectorized using
4987 outer-loop vectorization, we need to determine whether this initial vector
4988 load should be generated at the preheader of the inner-loop, or can be
4989 generated at the preheader of LOOP. If the memory access has no evolution
4990 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4991 to be generated inside LOOP (in the preheader of the inner-loop). */
4993 if (nested_in_vect_loop)
4995 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4996 bool invariant_in_outerloop =
4997 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4998 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5001 loop_for_initial_load = loop;
5003 *at_loop = loop_for_initial_load;
5005 /* 3. For the case of the optimized realignment, create the first vector
5006 load at the loop preheader. */
5008 if (alignment_support_scheme == dr_explicit_realign_optimized)
5010 /* Create msq_init = *(floor(p1)) in the loop preheader */
5012 gcc_assert (!compute_in_loop);
5013 pe = loop_preheader_edge (loop_for_initial_load);
5014 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5015 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
5016 &init_addr, &inc, true, NULL_TREE, &inv_p);
5017 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5018 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5019 new_temp = make_ssa_name (vec_dest, new_stmt);
5020 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5021 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5022 gcc_assert (!new_bb);
5023 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
5026 /* 4. Create realignment token using a target builtin, if available.
5027 It is done either inside the containing loop, or before LOOP (as
5028 determined above). */
5030 if (targetm.vectorize.builtin_mask_for_load)
5034 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5035 if (compute_in_loop)
5036 gcc_assert (init_addr); /* already computed by the caller. */
5039 /* Generate the INIT_ADDR computation outside LOOP. */
5040 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5042 pe = loop_preheader_edge (loop);
5043 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
5044 gcc_assert (!new_bb);
5047 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5048 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
5049 vec_dest = vect_create_destination_var (scalar_dest,
5050 TREE_TYPE (new_stmt));
5051 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5052 new_temp = make_ssa_name (vec_dest, new_stmt);
5053 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5055 if (compute_in_loop)
5056 bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
5059 /* Generate the misalignment computation outside LOOP. */
5060 pe = loop_preheader_edge (loop);
5061 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5062 gcc_assert (!new_bb);
5065 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
5067 /* The result of the CALL_EXPR to this builtin is determined from
5068 the value of the parameter and no global variables are touched
5069 which makes the builtin a "const" function. Requiring the
5070 builtin to have the "const" attribute makes it unnecessary
5071 to call mark_call_clobbered. */
5072 gcc_assert (TREE_READONLY (builtin_decl));
5075 if (alignment_support_scheme == dr_explicit_realign)
5078 gcc_assert (!compute_in_loop);
5079 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5082 /* 5. Create msq = phi <msq_init, lsq> in loop */
5084 pe = loop_preheader_edge (containing_loop);
5085 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5086 msq = make_ssa_name (vec_dest, NULL_TREE);
5087 phi_stmt = create_phi_node (msq, containing_loop->header);
5088 SSA_NAME_DEF_STMT (msq) = phi_stmt;
5089 add_phi_arg (phi_stmt, msq_init, pe);
5095 /* Function vect_strided_load_supported.
5097 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5098 and FALSE otherwise. */
5101 vect_strided_load_supported (tree vectype)
5103 optab perm_even_optab, perm_odd_optab;
5106 mode = (int) TYPE_MODE (vectype);
5108 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
5109 if (!perm_even_optab)
5111 if (vect_print_dump_info (REPORT_DETAILS))
5112 fprintf (vect_dump, "no optab for perm_even.");
5116 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
5118 if (vect_print_dump_info (REPORT_DETAILS))
5119 fprintf (vect_dump, "perm_even op not supported by target.");
5123 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
5124 if (!perm_odd_optab)
5126 if (vect_print_dump_info (REPORT_DETAILS))
5127 fprintf (vect_dump, "no optab for perm_odd.");
5131 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
5133 if (vect_print_dump_info (REPORT_DETAILS))
5134 fprintf (vect_dump, "perm_odd op not supported by target.");
5141 /* Function vect_permute_load_chain.
5143 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5144 a power of 2, generate extract_even/odd stmts to reorder the input data
5145 correctly. Return the final references for loads in RESULT_CHAIN.
5147 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5148 The input is 4 vectors each containing 8 elements. We assign a number to each
5149 element, the input sequence is:
5151 1st vec: 0 1 2 3 4 5 6 7
5152 2nd vec: 8 9 10 11 12 13 14 15
5153 3rd vec: 16 17 18 19 20 21 22 23
5154 4th vec: 24 25 26 27 28 29 30 31
5156 The output sequence should be:
5158 1st vec: 0 4 8 12 16 20 24 28
5159 2nd vec: 1 5 9 13 17 21 25 29
5160 3rd vec: 2 6 10 14 18 22 26 30
5161 4th vec: 3 7 11 15 19 23 27 31
5163 i.e., the first output vector should contain the first elements of each
5164 interleaving group, etc.
5166 We use extract_even/odd instructions to create such output. The input of each
5167 extract_even/odd operation is two vectors
5171 and the output is the vector of extracted even/odd elements. The output of
5172 extract_even will be: 0 2 4 6
5173 and of extract_odd: 1 3 5 7
5176 The permutation is done in log LENGTH stages. In each stage extract_even and
5177 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5178 order. In our example,
5180 E1: extract_even (1st vec, 2nd vec)
5181 E2: extract_odd (1st vec, 2nd vec)
5182 E3: extract_even (3rd vec, 4th vec)
5183 E4: extract_odd (3rd vec, 4th vec)
5185 The output for the first stage will be:
5187 E1: 0 2 4 6 8 10 12 14
5188 E2: 1 3 5 7 9 11 13 15
5189 E3: 16 18 20 22 24 26 28 30
5190 E4: 17 19 21 23 25 27 29 31
5192 In order to proceed and create the correct sequence for the next stage (or
5193 for the correct output, if the second stage is the last one, as in our
5194 example), we first put the output of extract_even operation and then the
5195 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5196 The input for the second stage is:
5198 1st vec (E1): 0 2 4 6 8 10 12 14
5199 2nd vec (E3): 16 18 20 22 24 26 28 30
5200 3rd vec (E2): 1 3 5 7 9 11 13 15
5201 4th vec (E4): 17 19 21 23 25 27 29 31
5203 The output of the second stage:
5205 E1: 0 4 8 12 16 20 24 28
5206 E2: 2 6 10 14 18 22 26 30
5207 E3: 1 5 9 13 17 21 25 29
5208 E4: 3 7 11 15 19 23 27 31
5210 And RESULT_CHAIN after reordering:
5212 1st vec (E1): 0 4 8 12 16 20 24 28
5213 2nd vec (E3): 1 5 9 13 17 21 25 29
5214 3rd vec (E2): 2 6 10 14 18 22 26 30
5215 4th vec (E4): 3 7 11 15 19 23 27 31. */
5218 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
5219 unsigned int length,
5221 block_stmt_iterator *bsi,
5222 VEC(tree,heap) **result_chain)
5224 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
5225 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5230 /* Check that the operation is supported. */
5231 if (!vect_strided_load_supported (vectype))
5234 *result_chain = VEC_copy (tree, heap, dr_chain);
5235 for (i = 0; i < exact_log2 (length); i++)
5237 for (j = 0; j < length; j +=2)
5239 first_vect = VEC_index (tree, dr_chain, j);
5240 second_vect = VEC_index (tree, dr_chain, j+1);
5242 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5243 perm_dest = create_tmp_var (vectype, "vect_perm_even");
5244 DECL_GIMPLE_REG_P (perm_dest) = 1;
5245 add_referenced_var (perm_dest);
5247 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
5248 first_vect, second_vect);
5249 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5251 data_ref = make_ssa_name (perm_dest, perm_stmt);
5252 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5253 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5254 mark_symbols_for_renaming (perm_stmt);
5256 VEC_replace (tree, *result_chain, j/2, data_ref);
5258 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5259 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
5260 DECL_GIMPLE_REG_P (perm_dest) = 1;
5261 add_referenced_var (perm_dest);
5263 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
5264 first_vect, second_vect);
5265 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5266 data_ref = make_ssa_name (perm_dest, perm_stmt);
5267 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5268 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5269 mark_symbols_for_renaming (perm_stmt);
5271 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
5273 dr_chain = VEC_copy (tree, heap, *result_chain);
5279 /* Function vect_transform_strided_load.
5281 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5282 to perform their permutation and ascribe the result vectorized statements to
5283 the scalar statements.
5287 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
5288 block_stmt_iterator *bsi)
5290 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5291 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5292 tree next_stmt, new_stmt;
5293 VEC(tree,heap) *result_chain = NULL;
5294 unsigned int i, gap_count;
5297 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5298 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5299 vectors, that are ready for vector computation. */
5300 result_chain = VEC_alloc (tree, heap, size);
5302 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
5305 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5306 Since we scan the chain starting from it's first node, their order
5307 corresponds the order of data-refs in RESULT_CHAIN. */
5308 next_stmt = first_stmt;
5310 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
5315 /* Skip the gaps. Loads created for the gaps will be removed by dead
5316 code elimination pass later.
5317 DR_GROUP_GAP is the number of steps in elements from the previous
5318 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5319 correspond to the gaps.
5321 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
5329 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5330 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5331 copies, and we put the new vector statement in the first available
5333 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5334 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5337 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5338 tree rel_stmt = STMT_VINFO_RELATED_STMT (
5339 vinfo_for_stmt (prev_stmt));
5342 prev_stmt = rel_stmt;
5343 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5345 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
5347 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5349 /* If NEXT_STMT accesses the same DR as the previous statement,
5350 put the same TMP_DATA_REF as its vectorized statement; otherwise
5351 get the next data-ref from RESULT_CHAIN. */
5352 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5360 /* vectorizable_load.
5362 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5364 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5365 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5366 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5369 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
5373 tree vec_dest = NULL;
5374 tree data_ref = NULL;
5376 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5377 stmt_vec_info prev_stmt_info;
5378 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5379 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5380 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
5381 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5382 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
5383 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5386 tree new_stmt = NULL_TREE;
5388 enum dr_alignment_support alignment_support_scheme;
5389 tree dataref_ptr = NULL_TREE;
5391 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5392 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5393 int i, j, group_size;
5394 tree msq = NULL_TREE, lsq;
5395 tree offset = NULL_TREE;
5396 tree realignment_token = NULL_TREE;
5397 tree phi = NULL_TREE;
5398 VEC(tree,heap) *dr_chain = NULL;
5399 bool strided_load = false;
5403 bool compute_in_loop = false;
5404 struct loop *at_loop;
5406 bool slp = (slp_node != NULL);
5408 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
5409 this, so we can safely override NCOPIES with 1 here. */
5413 gcc_assert (ncopies >= 1);
5415 /* FORNOW. This restriction should be relaxed. */
5416 if (nested_in_vect_loop && ncopies > 1)
5418 if (vect_print_dump_info (REPORT_DETAILS))
5419 fprintf (vect_dump, "multiple types in nested loop.");
5423 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5426 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5429 /* FORNOW: not yet supported. */
5430 if (STMT_VINFO_LIVE_P (stmt_info))
5432 if (vect_print_dump_info (REPORT_DETAILS))
5433 fprintf (vect_dump, "value used after loop.");
5437 /* Is vectorizable load? */
5438 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5441 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5442 if (TREE_CODE (scalar_dest) != SSA_NAME)
5445 op = GIMPLE_STMT_OPERAND (stmt, 1);
5446 if (TREE_CODE (op) != ARRAY_REF
5447 && TREE_CODE (op) != INDIRECT_REF
5448 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
5451 if (!STMT_VINFO_DATA_REF (stmt_info))
5454 scalar_type = TREE_TYPE (DR_REF (dr));
5455 mode = (int) TYPE_MODE (vectype);
5457 /* FORNOW. In some cases can vectorize even if data-type not supported
5458 (e.g. - data copies). */
5459 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
5461 if (vect_print_dump_info (REPORT_DETAILS))
5462 fprintf (vect_dump, "Aligned load, but unsupported type.");
5466 /* Check if the load is a part of an interleaving chain. */
5467 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5469 strided_load = true;
5471 gcc_assert (! nested_in_vect_loop);
5473 /* Check if interleaving is supported. */
5474 if (!vect_strided_load_supported (vectype)
5475 && !PURE_SLP_STMT (stmt_info) && !slp)
5479 if (!vec_stmt) /* transformation not required. */
5481 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5482 vect_model_load_cost (stmt_info, ncopies, NULL);
5486 if (vect_print_dump_info (REPORT_DETAILS))
5487 fprintf (vect_dump, "transform load.");
5493 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5494 /* Check if the chain of loads is already vectorized. */
5495 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
5497 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5500 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5501 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
5502 dr_chain = VEC_alloc (tree, heap, group_size);
5504 /* VEC_NUM is the number of vect stmts to be created for this group. */
5507 strided_load = false;
5508 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5511 vec_num = group_size;
5517 group_size = vec_num = 1;
5520 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
5521 gcc_assert (alignment_support_scheme);
5523 /* In case the vectorization factor (VF) is bigger than the number
5524 of elements that we can fit in a vectype (nunits), we have to generate
5525 more than one vector stmt - i.e - we need to "unroll" the
5526 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5527 from one copy of the vector stmt to the next, in the field
5528 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5529 stages to find the correct vector defs to be used when vectorizing
5530 stmts that use the defs of the current stmt. The example below illustrates
5531 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
5532 4 vectorized stmts):
5534 before vectorization:
5535 RELATED_STMT VEC_STMT
5539 step 1: vectorize stmt S1:
5540 We first create the vector stmt VS1_0, and, as usual, record a
5541 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
5542 Next, we create the vector stmt VS1_1, and record a pointer to
5543 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
5544 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
5546 RELATED_STMT VEC_STMT
5547 VS1_0: vx0 = memref0 VS1_1 -
5548 VS1_1: vx1 = memref1 VS1_2 -
5549 VS1_2: vx2 = memref2 VS1_3 -
5550 VS1_3: vx3 = memref3 - -
5551 S1: x = load - VS1_0
5554 See in documentation in vect_get_vec_def_for_stmt_copy for how the
5555 information we recorded in RELATED_STMT field is used to vectorize
5558 /* In case of interleaving (non-unit strided access):
5565 Vectorized loads are created in the order of memory accesses
5566 starting from the access of the first stmt of the chain:
5569 VS2: vx1 = &base + vec_size*1
5570 VS3: vx3 = &base + vec_size*2
5571 VS4: vx4 = &base + vec_size*3
5573 Then permutation statements are generated:
5575 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
5576 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
5579 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5580 (the order of the data-refs in the output of vect_permute_load_chain
5581 corresponds to the order of scalar stmts in the interleaving chain - see
5582 the documentation of vect_permute_load_chain()).
5583 The generation of permutation stmts and recording them in
5584 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
5586 In case of both multiple types and interleaving, the vector loads and
5587 permutation stmts above are created for every copy. The result vector stmts
5588 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5589 STMT_VINFO_RELATED_STMT for the next copies. */
5591 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5592 on a target that supports unaligned accesses (dr_unaligned_supported)
5593 we generate the following code:
5597 p = p + indx * vectype_size;
5602 Otherwise, the data reference is potentially unaligned on a target that
5603 does not support unaligned accesses (dr_explicit_realign_optimized) -
5604 then generate the following code, in which the data in each iteration is
5605 obtained by two vector loads, one from the previous iteration, and one
5606 from the current iteration:
5608 msq_init = *(floor(p1))
5609 p2 = initial_addr + VS - 1;
5610 realignment_token = call target_builtin;
5613 p2 = p2 + indx * vectype_size
5615 vec_dest = realign_load (msq, lsq, realignment_token)
5620 /* If the misalignment remains the same throughout the execution of the
5621 loop, we can create the init_addr and permutation mask at the loop
5622 preheader. Otherwise, it needs to be created inside the loop.
5623 This can only occur when vectorizing memory accesses in the inner-loop
5624 nested within an outer-loop that is being vectorized. */
5626 if (nested_in_vect_loop_p (loop, stmt)
5627 && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
5629 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5630 compute_in_loop = true;
5633 if ((alignment_support_scheme == dr_explicit_realign_optimized
5634 || alignment_support_scheme == dr_explicit_realign)
5635 && !compute_in_loop)
5637 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token,
5638 alignment_support_scheme, NULL_TREE,
5640 if (alignment_support_scheme == dr_explicit_realign_optimized)
5642 phi = SSA_NAME_DEF_STMT (msq);
5643 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5649 prev_stmt_info = NULL;
5650 for (j = 0; j < ncopies; j++)
5652 /* 1. Create the vector pointer update chain. */
5654 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
5656 &dummy, &ptr_incr, false,
5660 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
5662 for (i = 0; i < vec_num; i++)
5665 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
5668 /* 2. Create the vector-load in the loop. */
5669 switch (alignment_support_scheme)
5672 gcc_assert (aligned_access_p (first_dr));
5673 data_ref = build_fold_indirect_ref (dataref_ptr);
5675 case dr_unaligned_supported:
5677 int mis = DR_MISALIGNMENT (first_dr);
5678 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
5680 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
5682 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
5685 case dr_explicit_realign:
5688 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5690 if (compute_in_loop)
5691 msq = vect_setup_realignment (first_stmt, bsi,
5693 dr_explicit_realign,
5696 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5697 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5698 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5699 new_temp = make_ssa_name (vec_dest, new_stmt);
5700 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5701 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5702 copy_virtual_operands (new_stmt, stmt);
5703 mark_symbols_for_renaming (new_stmt);
5706 bump = size_binop (MULT_EXPR, vs_minus_1,
5707 TYPE_SIZE_UNIT (scalar_type));
5708 ptr = bump_vector_ptr (dataref_ptr, NULL_TREE, bsi, stmt, bump);
5709 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5712 case dr_explicit_realign_optimized:
5713 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5718 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5719 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5720 new_temp = make_ssa_name (vec_dest, new_stmt);
5721 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5722 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5723 mark_symbols_for_renaming (new_stmt);
5725 /* 3. Handle explicit realignment if necessary/supported. Create in
5726 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
5727 if (alignment_support_scheme == dr_explicit_realign_optimized
5728 || alignment_support_scheme == dr_explicit_realign)
5730 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
5731 if (!realignment_token)
5732 realignment_token = dataref_ptr;
5733 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5734 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
5736 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5737 new_temp = make_ssa_name (vec_dest, new_stmt);
5738 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5739 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5741 if (alignment_support_scheme == dr_explicit_realign_optimized)
5743 if (i == vec_num - 1 && j == ncopies - 1)
5744 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
5749 /* 4. Handle invariant-load. */
5752 gcc_assert (!strided_load);
5753 gcc_assert (nested_in_vect_loop_p (loop, stmt));
5758 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
5760 /* CHECKME: bitpos depends on endianess? */
5761 bitpos = bitsize_zero_node;
5762 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
5764 BIT_FIELD_REF_UNSIGNED (vec_inv) =
5765 TYPE_UNSIGNED (scalar_type);
5767 vect_create_destination_var (scalar_dest, NULL_TREE);
5768 new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
5769 new_temp = make_ssa_name (vec_dest, new_stmt);
5770 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5771 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5773 for (k = nunits - 1; k >= 0; --k)
5774 t = tree_cons (NULL_TREE, new_temp, t);
5775 /* FIXME: use build_constructor directly. */
5776 vec_inv = build_constructor_from_list (vectype, t);
5777 new_temp = vect_init_vector (stmt, vec_inv, vectype, bsi);
5778 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5781 gcc_unreachable (); /* FORNOW. */
5784 /* Collect vector loads and later create their permutation in
5785 vect_transform_strided_load (). */
5787 VEC_quick_push (tree, dr_chain, new_temp);
5789 /* Store vector loads in the corresponding SLP_NODE. */
5791 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5794 /* FORNOW: SLP with multiple types is unsupported. */
5800 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
5802 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5803 dr_chain = VEC_alloc (tree, heap, group_size);
5808 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5810 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5811 prev_stmt_info = vinfo_for_stmt (new_stmt);
5819 /* Function vectorizable_live_operation.
5821 STMT computes a value that is used outside the loop. Check if
5822 it can be supported. */
5825 vectorizable_live_operation (tree stmt,
5826 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
5827 tree *vec_stmt ATTRIBUTE_UNUSED)
5830 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5831 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5832 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5837 enum vect_def_type dt;
5839 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5841 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5844 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5847 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
5850 /* FORNOW. CHECKME. */
5851 if (nested_in_vect_loop_p (loop, stmt))
5854 operation = GIMPLE_STMT_OPERAND (stmt, 1);
5855 op_type = TREE_OPERAND_LENGTH (operation);
5857 /* FORNOW: support only if all uses are invariant. This means
5858 that the scalar operations can remain in place, unvectorized.
5859 The original last scalar value that they compute will be used. */
5861 for (i = 0; i < op_type; i++)
5863 op = TREE_OPERAND (operation, i);
5864 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
5866 if (vect_print_dump_info (REPORT_DETAILS))
5867 fprintf (vect_dump, "use not simple.");
5871 if (dt != vect_invariant_def && dt != vect_constant_def)
5875 /* No transformation is required for the cases we currently support. */
5880 /* Function vect_is_simple_cond.
5883 LOOP - the loop that is being vectorized.
5884 COND - Condition that is checked for simple use.
5886 Returns whether a COND can be vectorized. Checks whether
5887 condition operands are supportable using vec_is_simple_use. */
5890 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
5894 enum vect_def_type dt;
5896 if (!COMPARISON_CLASS_P (cond))
5899 lhs = TREE_OPERAND (cond, 0);
5900 rhs = TREE_OPERAND (cond, 1);
5902 if (TREE_CODE (lhs) == SSA_NAME)
5904 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5905 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
5908 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5909 && TREE_CODE (lhs) != FIXED_CST)
5912 if (TREE_CODE (rhs) == SSA_NAME)
5914 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5915 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
5918 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5919 && TREE_CODE (rhs) != FIXED_CST)
5925 /* vectorizable_condition.
5927 Check if STMT is conditional modify expression that can be vectorized.
5928 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5929 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5932 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5935 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
5937 tree scalar_dest = NULL_TREE;
5938 tree vec_dest = NULL_TREE;
5939 tree op = NULL_TREE;
5940 tree cond_expr, then_clause, else_clause;
5941 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5942 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5943 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
5944 tree vec_compare, vec_cond_expr;
5946 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5947 enum machine_mode vec_mode;
5949 enum vect_def_type dt;
5950 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5951 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5953 gcc_assert (ncopies >= 1);
5955 return false; /* FORNOW */
5957 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5960 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5963 /* FORNOW: SLP not supported. */
5964 if (STMT_SLP_TYPE (stmt_info))
5967 /* FORNOW: not yet supported. */
5968 if (STMT_VINFO_LIVE_P (stmt_info))
5970 if (vect_print_dump_info (REPORT_DETAILS))
5971 fprintf (vect_dump, "value used after loop.");
5975 /* Is vectorizable conditional operation? */
5976 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5979 op = GIMPLE_STMT_OPERAND (stmt, 1);
5981 if (TREE_CODE (op) != COND_EXPR)
5984 cond_expr = TREE_OPERAND (op, 0);
5985 then_clause = TREE_OPERAND (op, 1);
5986 else_clause = TREE_OPERAND (op, 2);
5988 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
5991 /* We do not handle two different vector types for the condition
5993 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
5996 if (TREE_CODE (then_clause) == SSA_NAME)
5998 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
5999 if (!vect_is_simple_use (then_clause, loop_vinfo,
6000 &then_def_stmt, &def, &dt))
6003 else if (TREE_CODE (then_clause) != INTEGER_CST
6004 && TREE_CODE (then_clause) != REAL_CST
6005 && TREE_CODE (then_clause) != FIXED_CST)
6008 if (TREE_CODE (else_clause) == SSA_NAME)
6010 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6011 if (!vect_is_simple_use (else_clause, loop_vinfo,
6012 &else_def_stmt, &def, &dt))
6015 else if (TREE_CODE (else_clause) != INTEGER_CST
6016 && TREE_CODE (else_clause) != REAL_CST
6017 && TREE_CODE (else_clause) != FIXED_CST)
6021 vec_mode = TYPE_MODE (vectype);
6025 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6026 return expand_vec_cond_expr_p (op, vec_mode);
6032 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
6033 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6035 /* Handle cond expr. */
6037 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
6039 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
6040 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
6041 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
6043 /* Arguments are ready. create the new vector stmt. */
6044 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
6045 vec_cond_lhs, vec_cond_rhs);
6046 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6047 vec_compare, vec_then_clause, vec_else_clause);
6049 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
6050 new_temp = make_ssa_name (vec_dest, *vec_stmt);
6051 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
6052 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
6058 /* Function vect_transform_stmt.
6060 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6063 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store,
6066 bool is_store = false;
6067 tree vec_stmt = NULL_TREE;
6068 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6069 tree orig_stmt_in_pattern;
6072 switch (STMT_VINFO_TYPE (stmt_info))
6074 case type_demotion_vec_info_type:
6075 gcc_assert (!slp_node);
6076 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
6080 case type_promotion_vec_info_type:
6081 gcc_assert (!slp_node);
6082 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
6086 case type_conversion_vec_info_type:
6087 done = vectorizable_conversion (stmt, bsi, &vec_stmt, slp_node);
6091 case induc_vec_info_type:
6092 gcc_assert (!slp_node);
6093 done = vectorizable_induction (stmt, bsi, &vec_stmt);
6097 case op_vec_info_type:
6098 done = vectorizable_operation (stmt, bsi, &vec_stmt, slp_node);
6102 case assignment_vec_info_type:
6103 done = vectorizable_assignment (stmt, bsi, &vec_stmt, slp_node);
6107 case load_vec_info_type:
6108 done = vectorizable_load (stmt, bsi, &vec_stmt, slp_node);
6112 case store_vec_info_type:
6113 done = vectorizable_store (stmt, bsi, &vec_stmt, slp_node);
6115 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6117 /* In case of interleaving, the whole chain is vectorized when the
6118 last store in the chain is reached. Store stmts before the last
6119 one are skipped, and there vec_stmt_info shouldn't be freed
6121 *strided_store = true;
6122 if (STMT_VINFO_VEC_STMT (stmt_info))
6129 case condition_vec_info_type:
6130 gcc_assert (!slp_node);
6131 done = vectorizable_condition (stmt, bsi, &vec_stmt);
6135 case call_vec_info_type:
6136 gcc_assert (!slp_node);
6137 done = vectorizable_call (stmt, bsi, &vec_stmt);
6140 case reduc_vec_info_type:
6141 gcc_assert (!slp_node);
6142 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
6147 if (!STMT_VINFO_LIVE_P (stmt_info))
6149 if (vect_print_dump_info (REPORT_DETAILS))
6150 fprintf (vect_dump, "stmt not supported.");
6155 if (STMT_VINFO_LIVE_P (stmt_info)
6156 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6158 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
6164 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6165 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
6166 if (orig_stmt_in_pattern)
6168 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
6169 /* STMT was inserted by the vectorizer to replace a computation idiom.
6170 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6171 computed this idiom. We need to record a pointer to VEC_STMT in
6172 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6173 documentation of vect_pattern_recog. */
6174 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
6176 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
6177 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
6186 /* This function builds ni_name = number of iterations loop executes
6187 on the loop preheader. */
6190 vect_build_loop_niters (loop_vec_info loop_vinfo)
6192 tree ni_name, stmt, var;
6194 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6195 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6197 var = create_tmp_var (TREE_TYPE (ni), "niters");
6198 add_referenced_var (var);
6199 ni_name = force_gimple_operand (ni, &stmt, false, var);
6201 pe = loop_preheader_edge (loop);
6204 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6205 gcc_assert (!new_bb);
6212 /* This function generates the following statements:
6214 ni_name = number of iterations loop executes
6215 ratio = ni_name / vf
6216 ratio_mult_vf_name = ratio * vf
6218 and places them at the loop preheader edge. */
6221 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6223 tree *ratio_mult_vf_name_ptr,
6224 tree *ratio_name_ptr)
6232 tree ratio_mult_vf_name;
6233 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6234 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
6235 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6238 pe = loop_preheader_edge (loop);
6240 /* Generate temporary variable that contains
6241 number of iterations loop executes. */
6243 ni_name = vect_build_loop_niters (loop_vinfo);
6244 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
6246 /* Create: ratio = ni >> log2(vf) */
6248 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
6249 if (!is_gimple_val (ratio_name))
6251 var = create_tmp_var (TREE_TYPE (ni), "bnd");
6252 add_referenced_var (var);
6254 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
6255 pe = loop_preheader_edge (loop);
6256 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6257 gcc_assert (!new_bb);
6260 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6262 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6263 ratio_name, log_vf);
6264 if (!is_gimple_val (ratio_mult_vf_name))
6266 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
6267 add_referenced_var (var);
6269 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
6271 pe = loop_preheader_edge (loop);
6272 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6273 gcc_assert (!new_bb);
6276 *ni_name_ptr = ni_name;
6277 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6278 *ratio_name_ptr = ratio_name;
6284 /* Function vect_update_ivs_after_vectorizer.
6286 "Advance" the induction variables of LOOP to the value they should take
6287 after the execution of LOOP. This is currently necessary because the
6288 vectorizer does not handle induction variables that are used after the
6289 loop. Such a situation occurs when the last iterations of LOOP are
6291 1. We introduced new uses after LOOP for IVs that were not originally used
6292 after LOOP: the IVs of LOOP are now used by an epilog loop.
6293 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6294 times, whereas the loop IVs should be bumped N times.
6297 - LOOP - a loop that is going to be vectorized. The last few iterations
6298 of LOOP were peeled.
6299 - NITERS - the number of iterations that LOOP executes (before it is
6300 vectorized). i.e, the number of times the ivs should be bumped.
6301 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6302 coming out from LOOP on which there are uses of the LOOP ivs
6303 (this is the path from LOOP->exit to epilog_loop->preheader).
6305 The new definitions of the ivs are placed in LOOP->exit.
6306 The phi args associated with the edge UPDATE_E in the bb
6307 UPDATE_E->dest are updated accordingly.
6309 Assumption 1: Like the rest of the vectorizer, this function assumes
6310 a single loop exit that has a single predecessor.
6312 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6313 organized in the same order.
6315 Assumption 3: The access function of the ivs is simple enough (see
6316 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6318 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6319 coming out of LOOP on which the ivs of LOOP are used (this is the path
6320 that leads to the epilog loop; other paths skip the epilog loop). This
6321 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6322 needs to have its phis updated.
6326 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
6329 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6330 basic_block exit_bb = single_exit (loop)->dest;
6332 basic_block update_bb = update_e->dest;
6334 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6336 /* Make sure there exists a single-predecessor exit bb: */
6337 gcc_assert (single_pred_p (exit_bb));
6339 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
6341 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
6343 tree access_fn = NULL;
6344 tree evolution_part;
6347 tree var, ni, ni_name;
6348 block_stmt_iterator last_bsi;
6350 if (vect_print_dump_info (REPORT_DETAILS))
6352 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
6353 print_generic_expr (vect_dump, phi, TDF_SLIM);
6356 /* Skip virtual phi's. */
6357 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
6359 if (vect_print_dump_info (REPORT_DETAILS))
6360 fprintf (vect_dump, "virtual phi. skip.");
6364 /* Skip reduction phis. */
6365 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
6367 if (vect_print_dump_info (REPORT_DETAILS))
6368 fprintf (vect_dump, "reduc phi. skip.");
6372 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
6373 gcc_assert (access_fn);
6375 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
6376 gcc_assert (evolution_part != NULL_TREE);
6378 /* FORNOW: We do not support IVs whose evolution function is a polynomial
6379 of degree >= 2 or exponential. */
6380 gcc_assert (!tree_is_chrec (evolution_part));
6382 step_expr = evolution_part;
6383 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
6386 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
6387 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
6389 fold_convert (sizetype,
6390 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
6391 niters, step_expr)));
6393 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
6394 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
6395 fold_convert (TREE_TYPE (init_expr),
6402 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
6403 add_referenced_var (var);
6405 last_bsi = bsi_last (exit_bb);
6406 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
6407 true, BSI_SAME_STMT);
6409 /* Fix phi expressions in the successor bb. */
6410 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
6415 /* Function vect_do_peeling_for_loop_bound
6417 Peel the last iterations of the loop represented by LOOP_VINFO.
6418 The peeled iterations form a new epilog loop. Given that the loop now
6419 iterates NITERS times, the new epilog loop iterates
6420 NITERS % VECTORIZATION_FACTOR times.
6422 The original loop will later be made to iterate
6423 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
6426 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
6428 tree ni_name, ratio_mult_vf_name;
6429 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6430 struct loop *new_loop;
6432 basic_block preheader;
6435 int min_scalar_loop_bound;
6436 int min_profitable_iters;
6438 if (vect_print_dump_info (REPORT_DETAILS))
6439 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
6441 initialize_original_copy_tables ();
6443 /* Generate the following variables on the preheader of original loop:
6445 ni_name = number of iteration the original loop executes
6446 ratio = ni_name / vf
6447 ratio_mult_vf_name = ratio * vf */
6448 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
6449 &ratio_mult_vf_name, ratio);
6451 loop_num = loop->num;
6453 /* Analyze cost to set threshhold for vectorized loop. */
6454 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
6455 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND))
6456 * LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6458 /* Use the cost model only if it is more conservative than user specified
6461 th = (unsigned) min_scalar_loop_bound;
6462 if (min_profitable_iters
6463 && (!min_scalar_loop_bound
6464 || min_profitable_iters > min_scalar_loop_bound))
6465 th = (unsigned) min_profitable_iters;
6467 if (min_profitable_iters
6468 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
6469 && vect_print_dump_info (REPORT_DETAILS))
6470 fprintf (vect_dump, "vectorization may not be profitable.");
6472 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
6473 ratio_mult_vf_name, ni_name, false,
6475 gcc_assert (new_loop);
6476 gcc_assert (loop_num == loop->num);
6477 #ifdef ENABLE_CHECKING
6478 slpeel_verify_cfg_after_peeling (loop, new_loop);
6481 /* A guard that controls whether the new_loop is to be executed or skipped
6482 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
6483 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
6484 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
6485 is on the path where the LOOP IVs are used and need to be updated. */
6487 preheader = loop_preheader_edge (new_loop)->src;
6488 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
6489 update_e = EDGE_PRED (preheader, 0);
6491 update_e = EDGE_PRED (preheader, 1);
6493 /* Update IVs of original loop as if they were advanced
6494 by ratio_mult_vf_name steps. */
6495 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
6497 /* After peeling we have to reset scalar evolution analyzer. */
6500 free_original_copy_tables ();
6504 /* Function vect_gen_niters_for_prolog_loop
6506 Set the number of iterations for the loop represented by LOOP_VINFO
6507 to the minimum between LOOP_NITERS (the original iteration count of the loop)
6508 and the misalignment of DR - the data reference recorded in
6509 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
6510 this loop, the data reference DR will refer to an aligned location.
6512 The following computation is generated:
6514 If the misalignment of DR is known at compile time:
6515 addr_mis = int mis = DR_MISALIGNMENT (dr);
6516 Else, compute address misalignment in bytes:
6517 addr_mis = addr & (vectype_size - 1)
6519 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
6521 (elem_size = element type size; an element is the scalar element
6522 whose type is the inner type of the vectype)
6526 prolog_niters = min ( LOOP_NITERS ,
6527 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
6528 where group_size is the size of the interleaved group.
6530 The above formulas assume that VF == number of elements in the vector. This
6531 may not hold when there are multiple-types in the loop.
6532 In this case, for some data-references in the loop the VF does not represent
6533 the number of elements that fit in the vector. Therefore, instead of VF we
6534 use TYPE_VECTOR_SUBPARTS. */
6537 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
6539 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
6540 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6542 tree iters, iters_name;
6545 tree dr_stmt = DR_STMT (dr);
6546 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
6547 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6548 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
6549 tree niters_type = TREE_TYPE (loop_niters);
6551 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
6552 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
6554 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6556 /* For interleaved access element size must be multiplied by the size of
6557 the interleaved group. */
6558 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
6559 DR_GROUP_FIRST_DR (stmt_info)));
6560 element_size *= group_size;
6563 pe = loop_preheader_edge (loop);
6565 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
6567 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
6568 int elem_misalign = byte_misalign / element_size;
6570 if (vect_print_dump_info (REPORT_DETAILS))
6571 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
6572 iters = build_int_cst (niters_type,
6573 (nelements - elem_misalign)&(nelements/group_size-1));
6577 tree new_stmts = NULL_TREE;
6578 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
6579 &new_stmts, NULL_TREE, loop);
6580 tree ptr_type = TREE_TYPE (start_addr);
6581 tree size = TYPE_SIZE (ptr_type);
6582 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
6583 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
6584 tree elem_size_log =
6585 build_int_cst (type, exact_log2 (vectype_align/nelements));
6586 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
6587 tree nelements_tree = build_int_cst (type, nelements);
6591 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
6592 gcc_assert (!new_bb);
6594 /* Create: byte_misalign = addr & (vectype_size - 1) */
6596 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
6598 /* Create: elem_misalign = byte_misalign / element_size */
6600 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
6602 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
6603 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
6604 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
6605 iters = fold_convert (niters_type, iters);
6608 /* Create: prolog_loop_niters = min (iters, loop_niters) */
6609 /* If the loop bound is known at compile time we already verified that it is
6610 greater than vf; since the misalignment ('iters') is at most vf, there's
6611 no need to generate the MIN_EXPR in this case. */
6612 if (TREE_CODE (loop_niters) != INTEGER_CST)
6613 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
6615 if (vect_print_dump_info (REPORT_DETAILS))
6617 fprintf (vect_dump, "niters for prolog loop: ");
6618 print_generic_expr (vect_dump, iters, TDF_SLIM);
6621 var = create_tmp_var (niters_type, "prolog_loop_niters");
6622 add_referenced_var (var);
6623 iters_name = force_gimple_operand (iters, &stmt, false, var);
6625 /* Insert stmt on loop preheader edge. */
6628 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6629 gcc_assert (!new_bb);
6636 /* Function vect_update_init_of_dr
6638 NITERS iterations were peeled from LOOP. DR represents a data reference
6639 in LOOP. This function updates the information recorded in DR to
6640 account for the fact that the first NITERS iterations had already been
6641 executed. Specifically, it updates the OFFSET field of DR. */
6644 vect_update_init_of_dr (struct data_reference *dr, tree niters)
6646 tree offset = DR_OFFSET (dr);
6648 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
6649 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
6650 DR_OFFSET (dr) = offset;
6654 /* Function vect_update_inits_of_drs
6656 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
6657 This function updates the information recorded for the data references in
6658 the loop to account for the fact that the first NITERS iterations had
6659 already been executed. Specifically, it updates the initial_condition of
6660 the access_function of all the data_references in the loop. */
6663 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
6666 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
6667 struct data_reference *dr;
6669 if (vect_print_dump_info (REPORT_DETAILS))
6670 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
6672 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
6673 vect_update_init_of_dr (dr, niters);
6677 /* Function vect_do_peeling_for_alignment
6679 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
6680 'niters' is set to the misalignment of one of the data references in the
6681 loop, thereby forcing it to refer to an aligned location at the beginning
6682 of the execution of this loop. The data reference for which we are
6683 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
6686 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
6688 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6689 tree niters_of_prolog_loop, ni_name;
6691 struct loop *new_loop;
6693 if (vect_print_dump_info (REPORT_DETAILS))
6694 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
6696 initialize_original_copy_tables ();
6698 ni_name = vect_build_loop_niters (loop_vinfo);
6699 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
6701 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
6703 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
6704 niters_of_prolog_loop, ni_name, true, 0);
6705 gcc_assert (new_loop);
6706 #ifdef ENABLE_CHECKING
6707 slpeel_verify_cfg_after_peeling (new_loop, loop);
6710 /* Update number of times loop executes. */
6711 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
6712 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
6713 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
6715 /* Update the init conditions of the access functions of all data refs. */
6716 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
6718 /* After peeling we have to reset scalar evolution analyzer. */
6721 free_original_copy_tables ();
6725 /* Function vect_create_cond_for_align_checks.
6727 Create a conditional expression that represents the alignment checks for
6728 all of data references (array element references) whose alignment must be
6732 LOOP_VINFO - two fields of the loop information are used.
6733 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
6734 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
6737 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6739 The returned value is the conditional expression to be used in the if
6740 statement that controls which version of the loop gets executed at runtime.
6742 The algorithm makes two assumptions:
6743 1) The number of bytes "n" in a vector is a power of 2.
6744 2) An address "a" is aligned if a%n is zero and that this
6745 test can be done as a&(n-1) == 0. For example, for 16
6746 byte vectors the test is a&0xf == 0. */
6749 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
6750 tree *cond_expr_stmt_list)
6752 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6753 VEC(tree,heap) *may_misalign_stmts
6754 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
6756 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
6760 tree int_ptrsize_type;
6762 tree or_tmp_name = NULL_TREE;
6763 tree and_tmp, and_tmp_name, and_stmt;
6766 /* Check that mask is one less than a power of 2, i.e., mask is
6767 all zeros followed by all ones. */
6768 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
6770 /* CHECKME: what is the best integer or unsigned type to use to hold a
6771 cast from a pointer value? */
6772 psize = TYPE_SIZE (ptr_type_node);
6774 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
6776 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
6777 of the first vector of the i'th data reference. */
6779 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
6781 tree new_stmt_list = NULL_TREE;
6783 tree addr_tmp, addr_tmp_name, addr_stmt;
6784 tree or_tmp, new_or_tmp_name, or_stmt;
6786 /* create: addr_tmp = (int)(address_of_first_vector) */
6787 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
6788 &new_stmt_list, NULL_TREE, loop);
6790 if (new_stmt_list != NULL_TREE)
6791 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
6793 sprintf (tmp_name, "%s%d", "addr2int", i);
6794 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6795 add_referenced_var (addr_tmp);
6796 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
6797 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
6798 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
6799 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
6800 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
6802 /* The addresses are OR together. */
6804 if (or_tmp_name != NULL_TREE)
6806 /* create: or_tmp = or_tmp | addr_tmp */
6807 sprintf (tmp_name, "%s%d", "orptrs", i);
6808 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6809 add_referenced_var (or_tmp);
6810 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
6811 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
6812 or_tmp_name, addr_tmp_name);
6813 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
6814 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
6815 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
6816 or_tmp_name = new_or_tmp_name;
6819 or_tmp_name = addr_tmp_name;
6823 mask_cst = build_int_cst (int_ptrsize_type, mask);
6825 /* create: and_tmp = or_tmp & mask */
6826 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
6827 add_referenced_var (and_tmp);
6828 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
6830 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
6831 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
6832 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
6833 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
6835 /* Make and_tmp the left operand of the conditional test against zero.
6836 if and_tmp has a nonzero bit then some address is unaligned. */
6837 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
6838 return build2 (EQ_EXPR, boolean_type_node,
6839 and_tmp_name, ptrsize_zero);
6842 /* Function vect_vfa_segment_size.
6844 Create an expression that computes the size of segment
6845 that will be accessed for a data reference. The functions takes into
6846 account that realignment loads may access one more vector.
6849 DR: The data reference.
6850 VECT_FACTOR: vectorization factor.
6852 Return an expression whose value is the size of segment which will be
6856 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
6858 tree segment_length;
6860 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
6863 build_int_cst (integer_type_node,
6864 GET_MODE_SIZE (TYPE_MODE (STMT_VINFO_VECTYPE
6865 (vinfo_for_stmt (DR_STMT (dr))))));
6868 fold_convert (sizetype,
6869 fold_build2 (PLUS_EXPR, integer_type_node,
6870 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
6877 fold_convert (sizetype,
6878 fold_build2 (MULT_EXPR, integer_type_node, DR_STEP (dr),
6882 return segment_length;
6885 /* Function vect_create_cond_for_alias_checks.
6887 Create a conditional expression that represents the run-time checks for
6888 overlapping of address ranges represented by a list of data references
6889 relations passed as input.
6892 COND_EXPR - input conditional expression. New conditions will be chained
6893 with logical and operation.
6894 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
6898 COND_EXPR - conditional expression.
6899 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6901 The returned value is the conditional expression to be used in the if
6902 statement that controls which version of the loop gets executed at runtime.
6906 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
6908 tree * cond_expr_stmt_list)
6910 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6911 VEC (ddr_p, heap) * may_alias_ddrs =
6912 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
6914 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
6918 tree part_cond_expr;
6920 /* Create expression
6921 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
6922 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
6926 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
6927 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
6929 if (VEC_empty (ddr_p, may_alias_ddrs))
6932 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
6934 tree stmt_a = DR_STMT (DDR_A (ddr));
6935 tree stmt_b = DR_STMT (DDR_B (ddr));
6938 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
6941 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
6944 tree segment_length_a = vect_vfa_segment_size (DDR_A (ddr), vect_factor);
6945 tree segment_length_b = vect_vfa_segment_size (DDR_B (ddr), vect_factor);
6947 if (vect_print_dump_info (REPORT_DR_DETAILS))
6950 "create runtime check for data references ");
6951 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
6952 fprintf (vect_dump, " and ");
6953 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
6958 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
6959 fold_build2 (LT_EXPR, boolean_type_node,
6960 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
6964 fold_build2 (LT_EXPR, boolean_type_node,
6965 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
6971 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
6972 *cond_expr, part_cond_expr);
6974 *cond_expr = part_cond_expr;
6976 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
6977 fprintf (vect_dump, "created %u versioning for alias checks.\n",
6978 VEC_length (ddr_p, may_alias_ddrs));
6982 /* Remove a group of stores (for SLP or interleaving), free their
6986 vect_remove_stores (tree first_stmt)
6989 tree next = first_stmt;
6991 stmt_vec_info next_stmt_info;
6992 block_stmt_iterator next_si;
6996 /* Free the attached stmt_vec_info and remove the stmt. */
6997 next_si = bsi_for_stmt (next);
6998 bsi_remove (&next_si, true);
6999 next_stmt_info = vinfo_for_stmt (next);
7000 ann = stmt_ann (next);
7001 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7002 free (next_stmt_info);
7003 set_stmt_info (ann, NULL);
7009 /* Vectorize SLP instance tree in postorder. */
7012 vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
7015 bool strided_store, is_store;
7016 block_stmt_iterator si;
7017 stmt_vec_info stmt_info;
7022 vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
7023 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
7025 stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0);
7026 stmt_info = vinfo_for_stmt (stmt);
7027 SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size);
7028 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
7030 if (vect_print_dump_info (REPORT_DETAILS))
7032 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
7033 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7036 si = bsi_for_stmt (stmt);
7037 is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
7040 if (DR_GROUP_FIRST_DR (stmt_info))
7041 /* If IS_STORE is TRUE, the vectorization of the
7042 interleaving chain was completed - free all the stores in
7044 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
7046 /* FORNOW: SLP originates only from strided stores. */
7052 /* FORNOW: SLP originates only from strided stores. */
7058 vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
7060 VEC (slp_instance, heap) *slp_instances =
7061 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
7062 slp_instance instance;
7063 unsigned int vec_stmts_size;
7064 unsigned int group_size, i;
7065 unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7066 bool is_store = false;
7068 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
7070 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
7071 /* For each SLP instance calculate number of vector stmts to be created
7072 for the scalar stmts in each node of the SLP tree. Number of vector
7073 elements in one vector iteration is the number of scalar elements in
7074 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7076 vec_stmts_size = vectorization_factor * group_size / nunits;
7078 /* Schedule the tree of INSTANCE. */
7079 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
7082 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
7083 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
7084 fprintf (vect_dump, "vectorizing stmts using SLP.");
7091 /* Function vect_transform_loop.
7093 The analysis phase has determined that the loop is vectorizable.
7094 Vectorize the loop - created vectorized stmts to replace the scalar
7095 stmts in the loop, and update the loop exit condition. */
7098 vect_transform_loop (loop_vec_info loop_vinfo)
7100 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7101 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7102 int nbbs = loop->num_nodes;
7103 block_stmt_iterator si, next_si;
7106 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7108 bool slp_scheduled = false;
7109 unsigned int nunits;
7111 if (vect_print_dump_info (REPORT_DETAILS))
7112 fprintf (vect_dump, "=== vec_transform_loop ===");
7114 /* If the loop has data references that may or may not be aligned or/and
7115 has data reference relations whose independence was not proven then
7116 two versions of the loop need to be generated, one which is vectorized
7117 and one which isn't. A test is then generated to control which of the
7118 loops is executed. The test checks for the alignment of all of the
7119 data references that may or may not be aligned. An additional
7120 sequence of runtime tests is generated for each pairs of DDRs whose
7121 independence was not proven. The vectorized version of loop is
7122 executed only if both alias and alignment tests are passed. */
7124 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7125 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7128 tree cond_expr = NULL_TREE;
7129 tree cond_expr_stmt_list = NULL_TREE;
7130 basic_block condition_bb;
7131 block_stmt_iterator cond_exp_bsi;
7132 basic_block merge_bb;
7133 basic_block new_exit_bb;
7135 tree orig_phi, new_phi, arg;
7136 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
7137 tree gimplify_stmt_list;
7139 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
7141 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
7143 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7144 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr,
7145 &cond_expr_stmt_list);
7148 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
7150 force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
7152 append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
7154 initialize_original_copy_tables ();
7155 nloop = loop_version (loop, cond_expr, &condition_bb,
7156 prob, prob, REG_BR_PROB_BASE - prob, true);
7157 free_original_copy_tables();
7159 /** Loop versioning violates an assumption we try to maintain during
7160 vectorization - that the loop exit block has a single predecessor.
7161 After versioning, the exit block of both loop versions is the same
7162 basic block (i.e. it has two predecessors). Just in order to simplify
7163 following transformations in the vectorizer, we fix this situation
7164 here by adding a new (empty) block on the exit-edge of the loop,
7165 with the proper loop-exit phis to maintain loop-closed-form. **/
7167 merge_bb = single_exit (loop)->dest;
7168 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
7169 new_exit_bb = split_edge (single_exit (loop));
7170 new_exit_e = single_exit (loop);
7171 e = EDGE_SUCC (new_exit_bb, 0);
7173 for (orig_phi = phi_nodes (merge_bb); orig_phi;
7174 orig_phi = PHI_CHAIN (orig_phi))
7176 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
7178 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
7179 add_phi_arg (new_phi, arg, new_exit_e);
7180 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
7183 /** end loop-exit-fixes after versioning **/
7185 update_ssa (TODO_update_ssa);
7186 cond_exp_bsi = bsi_last (condition_bb);
7187 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
7190 /* CHECKME: we wouldn't need this if we called update_ssa once
7192 bitmap_zero (vect_memsyms_to_rename);
7194 /* Peel the loop if there are data refs with unknown alignment.
7195 Only one data ref with unknown store is allowed. */
7197 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
7198 vect_do_peeling_for_alignment (loop_vinfo);
7200 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7201 compile time constant), or it is a constant that doesn't divide by the
7202 vectorization factor, then an epilog loop needs to be created.
7203 We therefore duplicate the loop: the original loop will be vectorized,
7204 and will compute the first (n/VF) iterations. The second copy of the loop
7205 will remain scalar and will compute the remaining (n%VF) iterations.
7206 (VF is the vectorization factor). */
7208 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7209 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7210 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
7211 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
7213 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7214 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
7216 /* 1) Make sure the loop header has exactly two entries
7217 2) Make sure we have a preheader basic block. */
7219 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7221 split_edge (loop_preheader_edge (loop));
7223 /* FORNOW: the vectorizer supports only loops which body consist
7224 of one basic block (header + empty latch). When the vectorizer will
7225 support more involved loop forms, the order by which the BBs are
7226 traversed need to be reconsidered. */
7228 for (i = 0; i < nbbs; i++)
7230 basic_block bb = bbs[i];
7231 stmt_vec_info stmt_info;
7234 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
7236 if (vect_print_dump_info (REPORT_DETAILS))
7238 fprintf (vect_dump, "------>vectorizing phi: ");
7239 print_generic_expr (vect_dump, phi, TDF_SLIM);
7241 stmt_info = vinfo_for_stmt (phi);
7245 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7246 && !STMT_VINFO_LIVE_P (stmt_info))
7249 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7250 != (unsigned HOST_WIDE_INT) vectorization_factor)
7251 && vect_print_dump_info (REPORT_DETAILS))
7252 fprintf (vect_dump, "multiple-types.");
7254 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
7256 if (vect_print_dump_info (REPORT_DETAILS))
7257 fprintf (vect_dump, "transform phi.");
7258 vect_transform_stmt (phi, NULL, NULL, NULL);
7262 for (si = bsi_start (bb); !bsi_end_p (si);)
7264 tree stmt = bsi_stmt (si);
7267 if (vect_print_dump_info (REPORT_DETAILS))
7269 fprintf (vect_dump, "------>vectorizing statement: ");
7270 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7273 stmt_info = vinfo_for_stmt (stmt);
7275 /* vector stmts created in the outer-loop during vectorization of
7276 stmts in an inner-loop may not have a stmt_info, and do not
7277 need to be vectorized. */
7284 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7285 && !STMT_VINFO_LIVE_P (stmt_info))
7291 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
7293 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7294 if (!STMT_SLP_TYPE (stmt_info)
7295 && nunits != (unsigned int) vectorization_factor
7296 && vect_print_dump_info (REPORT_DETAILS))
7297 /* For SLP VF is set according to unrolling factor, and not to
7298 vector size, hence for SLP this print is not valid. */
7299 fprintf (vect_dump, "multiple-types.");
7301 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7303 if (STMT_SLP_TYPE (stmt_info))
7307 slp_scheduled = true;
7309 if (vect_print_dump_info (REPORT_DETAILS))
7310 fprintf (vect_dump, "=== scheduling SLP instances ===");
7312 is_store = vect_schedule_slp (loop_vinfo, nunits);
7314 /* IS_STORE is true if STMT is a store. Stores cannot be of
7315 hybrid SLP type. They are removed in
7316 vect_schedule_slp_instance and their vinfo is destroyed. */
7324 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7325 if (PURE_SLP_STMT (stmt_info))
7332 /* -------- vectorize statement ------------ */
7333 if (vect_print_dump_info (REPORT_DETAILS))
7334 fprintf (vect_dump, "transform statement.");
7336 strided_store = false;
7337 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
7341 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
7343 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7344 interleaving chain was completed - free all the stores in
7346 tree next = DR_GROUP_FIRST_DR (stmt_info);
7348 stmt_vec_info next_stmt_info;
7352 next_si = bsi_for_stmt (next);
7353 next_stmt_info = vinfo_for_stmt (next);
7354 /* Free the attached stmt_vec_info and remove the stmt. */
7355 ann = stmt_ann (next);
7356 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7357 free (next_stmt_info);
7358 set_stmt_info (ann, NULL);
7359 bsi_remove (&next_si, true);
7362 bsi_remove (&si, true);
7367 /* Free the attached stmt_vec_info and remove the stmt. */
7368 ann = stmt_ann (stmt);
7370 set_stmt_info (ann, NULL);
7371 bsi_remove (&si, true);
7379 slpeel_make_loop_iterate_ntimes (loop, ratio);
7381 mark_set_for_renaming (vect_memsyms_to_rename);
7383 /* The memory tags and pointers in vectorized statements need to
7384 have their SSA forms updated. FIXME, why can't this be delayed
7385 until all the loops have been transformed? */
7386 update_ssa (TODO_update_ssa);
7388 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7389 fprintf (vect_dump, "LOOP VECTORIZED.");
7390 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7391 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");