1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (tree, block_stmt_iterator *, bool *, slp_tree);
50 static tree vect_create_destination_var (tree, tree);
51 static tree vect_create_data_ref_ptr
52 (tree, struct loop*, tree, tree *, tree *, bool, tree, bool *);
53 static tree vect_create_addr_base_for_vector_ref
54 (tree, tree *, tree, struct loop *);
55 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
56 static tree vect_get_vec_def_for_operand (tree, tree, tree *);
57 static tree vect_init_vector (tree, tree, tree, block_stmt_iterator *);
58 static void vect_finish_stmt_generation
59 (tree stmt, tree vec_stmt, block_stmt_iterator *);
60 static bool vect_is_simple_cond (tree, loop_vec_info);
61 static void vect_create_epilog_for_reduction (tree, tree, enum tree_code, tree);
62 static tree get_initial_def_for_reduction (tree, tree, tree *);
64 /* Utility function dealing with loop peeling (not peeling itself). */
65 static void vect_generate_tmps_on_preheader
66 (loop_vec_info, tree *, tree *, tree *);
67 static tree vect_build_loop_niters (loop_vec_info);
68 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
69 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
70 static void vect_update_init_of_dr (struct data_reference *, tree niters);
71 static void vect_update_inits_of_drs (loop_vec_info, tree);
72 static int vect_min_worthwhile_factor (enum tree_code);
76 cost_for_stmt (tree stmt)
78 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
80 switch (STMT_VINFO_TYPE (stmt_info))
82 case load_vec_info_type:
83 return TARG_SCALAR_LOAD_COST;
84 case store_vec_info_type:
85 return TARG_SCALAR_STORE_COST;
86 case op_vec_info_type:
87 case condition_vec_info_type:
88 case assignment_vec_info_type:
89 case reduc_vec_info_type:
90 case induc_vec_info_type:
91 case type_promotion_vec_info_type:
92 case type_demotion_vec_info_type:
93 case type_conversion_vec_info_type:
94 case call_vec_info_type:
95 return TARG_SCALAR_STMT_COST;
96 case undef_vec_info_type:
103 /* Function vect_estimate_min_profitable_iters
105 Return the number of iterations required for the vector version of the
106 loop to be profitable relative to the cost of the scalar version of the
109 TODO: Take profile info into account before making vectorization
110 decisions, if available. */
113 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
116 int min_profitable_iters;
117 int peel_iters_prologue;
118 int peel_iters_epilogue;
119 int vec_inside_cost = 0;
120 int vec_outside_cost = 0;
121 int scalar_single_iter_cost = 0;
122 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
124 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
125 int nbbs = loop->num_nodes;
127 int peel_guard_costs = 0;
128 int innerloop_iters = 0, factor;
129 VEC (slp_instance, heap) *slp_instances;
130 slp_instance instance;
132 /* Cost model disabled. */
133 if (!flag_vect_cost_model)
135 if (vect_print_dump_info (REPORT_DETAILS))
136 fprintf (vect_dump, "cost model disabled.");
140 /* Requires loop versioning tests to handle misalignment. */
142 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
144 /* FIXME: Make cost depend on complexity of individual check. */
146 VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
147 if (vect_print_dump_info (REPORT_DETAILS))
148 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
149 "versioning to treat misalignment.\n");
152 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
154 /* FIXME: Make cost depend on complexity of individual check. */
156 VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
157 if (vect_print_dump_info (REPORT_DETAILS))
158 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
159 "versioning aliasing.\n");
162 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
163 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
165 vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
168 /* Count statements in scalar loop. Using this as scalar cost for a single
171 TODO: Add outer loop support.
173 TODO: Consider assigning different costs to different scalar
178 innerloop_iters = 50; /* FIXME */
180 for (i = 0; i < nbbs; i++)
182 block_stmt_iterator si;
183 basic_block bb = bbs[i];
185 if (bb->loop_father == loop->inner)
186 factor = innerloop_iters;
190 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
192 tree stmt = bsi_stmt (si);
193 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
194 if (!STMT_VINFO_RELEVANT_P (stmt_info)
195 && !STMT_VINFO_LIVE_P (stmt_info))
197 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
198 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
199 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
200 some of the "outside" costs are generated inside the outer-loop. */
201 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
205 /* Add additional cost for the peeled instructions in prologue and epilogue
208 FORNOW: If we dont know the value of peel_iters for prologue or epilogue
209 at compile-time - we assume it's vf/2 (the worst would be vf-1).
211 TODO: Build an expression that represents peel_iters for prologue and
212 epilogue to be used in a run-time test. */
214 byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
216 if (byte_misalign < 0)
218 peel_iters_prologue = vf/2;
219 if (vect_print_dump_info (REPORT_DETAILS))
220 fprintf (vect_dump, "cost model: "
221 "prologue peel iters set to vf/2.");
223 /* If peeling for alignment is unknown, loop bound of main loop becomes
225 peel_iters_epilogue = vf/2;
226 if (vect_print_dump_info (REPORT_DETAILS))
227 fprintf (vect_dump, "cost model: "
228 "epilogue peel iters set to vf/2 because "
229 "peeling for alignment is unknown .");
231 /* If peeled iterations are unknown, count a taken branch and a not taken
232 branch per peeled loop. Even if scalar loop iterations are known,
233 vector iterations are not known since peeled prologue iterations are
234 not known. Hence guards remain the same. */
235 peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
236 + TARG_COND_NOT_TAKEN_BRANCH_COST);
243 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
244 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
245 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
246 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
248 peel_iters_prologue = nelements - (byte_misalign / element_size);
251 peel_iters_prologue = 0;
253 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
255 peel_iters_epilogue = vf/2;
256 if (vect_print_dump_info (REPORT_DETAILS))
257 fprintf (vect_dump, "cost model: "
258 "epilogue peel iters set to vf/2 because "
259 "loop iterations are unknown .");
261 /* If peeled iterations are known but number of scalar loop
262 iterations are unknown, count a taken branch per peeled loop. */
263 peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
268 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
269 peel_iters_prologue = niters < peel_iters_prologue ?
270 niters : peel_iters_prologue;
271 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
275 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
276 + (peel_iters_epilogue * scalar_single_iter_cost)
279 /* Allow targets add additional (outside-of-loop) costs. FORNOW, the only
280 information we provide for the target is whether testing against the
281 threshold involves a runtime test. */
282 if (targetm.vectorize.builtin_vectorization_cost)
284 bool runtime_test = false;
286 /* If the number of iterations is unknown, or the
287 peeling-for-misalignment amount is unknown, we eill have to generate
288 a runtime test to test the loop count against the threshold. */
289 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
290 || (byte_misalign < 0))
293 targetm.vectorize.builtin_vectorization_cost (runtime_test);
294 if (vect_print_dump_info (REPORT_DETAILS))
295 fprintf (vect_dump, "cost model : Adding target out-of-loop cost = %d",
296 targetm.vectorize.builtin_vectorization_cost (runtime_test));
300 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
301 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
303 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
304 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
307 /* Calculate number of iterations required to make the vector version
308 profitable, relative to the loop bodies only. The following condition
309 must hold true: ((SIC*VF)-VIC)*niters > VOC*VF, where
310 SIC = scalar iteration cost, VIC = vector iteration cost,
311 VOC = vector outside cost and VF = vectorization factor. */
313 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
315 if (vec_outside_cost <= 0)
316 min_profitable_iters = 1;
319 min_profitable_iters = (vec_outside_cost * vf
320 - vec_inside_cost * peel_iters_prologue
321 - vec_inside_cost * peel_iters_epilogue)
322 / ((scalar_single_iter_cost * vf)
325 if ((scalar_single_iter_cost * vf * min_profitable_iters)
326 <= ((vec_inside_cost * min_profitable_iters)
327 + (vec_outside_cost * vf)))
328 min_profitable_iters++;
331 /* vector version will never be profitable. */
334 if (vect_print_dump_info (REPORT_DETAILS))
335 fprintf (vect_dump, "cost model: vector iteration cost = %d "
336 "is divisible by scalar iteration cost = %d by a factor "
337 "greater than or equal to the vectorization factor = %d .",
338 vec_inside_cost, scalar_single_iter_cost, vf);
342 if (vect_print_dump_info (REPORT_DETAILS))
344 fprintf (vect_dump, "Cost model analysis: \n");
345 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
347 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
349 fprintf (vect_dump, " Scalar cost: %d\n", scalar_single_iter_cost);
350 fprintf (vect_dump, " prologue iterations: %d\n",
351 peel_iters_prologue);
352 fprintf (vect_dump, " epilogue iterations: %d\n",
353 peel_iters_epilogue);
354 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
355 min_profitable_iters);
358 min_profitable_iters =
359 min_profitable_iters < vf ? vf : min_profitable_iters;
361 /* Because the condition we create is:
362 if (niters <= min_profitable_iters)
363 then skip the vectorized loop. */
364 min_profitable_iters--;
366 if (vect_print_dump_info (REPORT_DETAILS))
367 fprintf (vect_dump, " Profitability threshold = %d\n",
368 min_profitable_iters);
370 return min_profitable_iters;
374 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
375 functions. Design better to avoid maintenance issues. */
377 /* Function vect_model_reduction_cost.
379 Models cost for a reduction operation, including the vector ops
380 generated within the strip-mine loop, the initial definition before
381 the loop, and the epilogue code that must be generated. */
384 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
393 enum machine_mode mode;
394 tree operation = GIMPLE_STMT_OPERAND (STMT_VINFO_STMT (stmt_info), 1);
395 int op_type = TREE_CODE_LENGTH (TREE_CODE (operation));
396 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
397 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
399 /* Cost of reduction op inside loop. */
400 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
402 reduction_op = TREE_OPERAND (operation, op_type-1);
403 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
406 if (vect_print_dump_info (REPORT_DETAILS))
408 fprintf (vect_dump, "unsupported data-type ");
409 print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
414 mode = TYPE_MODE (vectype);
415 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
418 orig_stmt = STMT_VINFO_STMT (stmt_info);
420 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
422 /* Add in cost for initial definition. */
423 outer_cost += TARG_SCALAR_TO_VEC_COST;
425 /* Determine cost of epilogue code.
427 We have a reduction operator that will reduce the vector in one statement.
428 Also requires scalar extract. */
430 if (!nested_in_vect_loop_p (loop, orig_stmt))
432 if (reduc_code < NUM_TREE_CODES)
433 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
436 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
438 TYPE_SIZE (TREE_TYPE ( GIMPLE_STMT_OPERAND (orig_stmt, 0)));
439 int element_bitsize = tree_low_cst (bitsize, 1);
440 int nelements = vec_size_in_bits / element_bitsize;
442 optab = optab_for_tree_code (code, vectype);
444 /* We have a whole vector shift available. */
445 if (VECTOR_MODE_P (mode)
446 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
447 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
448 /* Final reduction via vector shifts and the reduction operator. Also
449 requires scalar extract. */
450 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
451 + TARG_VEC_TO_SCALAR_COST);
453 /* Use extracts and reduction op for final reduction. For N elements,
454 we have N extracts and N-1 reduction ops. */
455 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
459 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
461 if (vect_print_dump_info (REPORT_DETAILS))
462 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
463 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
464 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
470 /* Function vect_model_induction_cost.
472 Models cost for induction operations. */
475 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
477 /* loop cost for vec_loop. */
478 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
479 /* prologue cost for vec_init and vec_step. */
480 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
482 if (vect_print_dump_info (REPORT_DETAILS))
483 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
484 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
485 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
489 /* Function vect_model_simple_cost.
491 Models cost for simple operations, i.e. those that only emit ncopies of a
492 single op. Right now, this does not account for multiple insns that could
493 be generated for the single vector op. We will handle that shortly. */
496 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
497 enum vect_def_type *dt, slp_tree slp_node)
500 int inside_cost = 0, outside_cost = 0;
502 inside_cost = ncopies * TARG_VEC_STMT_COST;
504 /* FORNOW: Assuming maximum 2 args per stmts. */
505 for (i = 0; i < 2; i++)
507 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
508 outside_cost += TARG_SCALAR_TO_VEC_COST;
511 if (vect_print_dump_info (REPORT_DETAILS))
512 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
513 "outside_cost = %d .", inside_cost, outside_cost);
515 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
516 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
517 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
521 /* Function vect_cost_strided_group_size
523 For strided load or store, return the group_size only if it is the first
524 load or store of a group, else return 1. This ensures that group size is
525 only returned once per group. */
528 vect_cost_strided_group_size (stmt_vec_info stmt_info)
530 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
532 if (first_stmt == STMT_VINFO_STMT (stmt_info))
533 return DR_GROUP_SIZE (stmt_info);
539 /* Function vect_model_store_cost
541 Models cost for stores. In the case of strided accesses, one access
542 has the overhead of the strided access attributed to it. */
545 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
546 enum vect_def_type dt, slp_tree slp_node)
549 int inside_cost = 0, outside_cost = 0;
551 if (dt == vect_constant_def || dt == vect_invariant_def)
552 outside_cost = TARG_SCALAR_TO_VEC_COST;
554 /* Strided access? */
555 if (DR_GROUP_FIRST_DR (stmt_info))
556 group_size = vect_cost_strided_group_size (stmt_info);
557 /* Not a strided access. */
561 /* Is this an access in a group of stores, which provide strided access?
562 If so, add in the cost of the permutes. */
565 /* Uses a high and low interleave operation for each needed permute. */
566 inside_cost = ncopies * exact_log2(group_size) * group_size
567 * TARG_VEC_STMT_COST;
569 if (vect_print_dump_info (REPORT_DETAILS))
570 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
575 /* Costs of the stores. */
576 inside_cost += ncopies * TARG_VEC_STORE_COST;
578 if (vect_print_dump_info (REPORT_DETAILS))
579 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
580 "outside_cost = %d .", inside_cost, outside_cost);
582 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
583 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
584 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
588 /* Function vect_model_load_cost
590 Models cost for loads. In the case of strided accesses, the last access
591 has the overhead of the strided access attributed to it. Since unaligned
592 accesses are supported for loads, we also account for the costs of the
593 access scheme chosen. */
596 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
600 int alignment_support_cheme;
602 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
603 int inside_cost = 0, outside_cost = 0;
605 /* Strided accesses? */
606 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
607 if (first_stmt && !slp_node)
609 group_size = vect_cost_strided_group_size (stmt_info);
610 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
612 /* Not a strided access. */
619 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
621 /* Is this an access in a group of loads providing strided access?
622 If so, add in the cost of the permutes. */
625 /* Uses an even and odd extract operations for each needed permute. */
626 inside_cost = ncopies * exact_log2(group_size) * group_size
627 * TARG_VEC_STMT_COST;
629 if (vect_print_dump_info (REPORT_DETAILS))
630 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
635 /* The loads themselves. */
636 switch (alignment_support_cheme)
640 inside_cost += ncopies * TARG_VEC_LOAD_COST;
642 if (vect_print_dump_info (REPORT_DETAILS))
643 fprintf (vect_dump, "vect_model_load_cost: aligned.");
647 case dr_unaligned_supported:
649 /* Here, we assign an additional cost for the unaligned load. */
650 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
652 if (vect_print_dump_info (REPORT_DETAILS))
653 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
658 case dr_explicit_realign:
660 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
662 /* FIXME: If the misalignment remains fixed across the iterations of
663 the containing loop, the following cost should be added to the
665 if (targetm.vectorize.builtin_mask_for_load)
666 inside_cost += TARG_VEC_STMT_COST;
670 case dr_explicit_realign_optimized:
672 if (vect_print_dump_info (REPORT_DETAILS))
673 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
676 /* Unaligned software pipeline has a load of an address, an initial
677 load, and possibly a mask operation to "prime" the loop. However,
678 if this is an access in a group of loads, which provide strided
679 access, then the above cost should only be considered for one
680 access in the group. Inside the loop, there is a load op
681 and a realignment op. */
683 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
685 outside_cost = 2*TARG_VEC_STMT_COST;
686 if (targetm.vectorize.builtin_mask_for_load)
687 outside_cost += TARG_VEC_STMT_COST;
690 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
699 if (vect_print_dump_info (REPORT_DETAILS))
700 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
701 "outside_cost = %d .", inside_cost, outside_cost);
703 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
704 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
705 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
709 /* Function vect_get_new_vect_var.
711 Returns a name for a new variable. The current naming scheme appends the
712 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
713 the name of vectorizer generated variables, and appends that to NAME if
717 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
724 case vect_simple_var:
727 case vect_scalar_var:
730 case vect_pointer_var:
739 char* tmp = concat (prefix, name, NULL);
740 new_vect_var = create_tmp_var (type, tmp);
744 new_vect_var = create_tmp_var (type, prefix);
746 /* Mark vector typed variable as a gimple register variable. */
747 if (TREE_CODE (type) == VECTOR_TYPE)
748 DECL_GIMPLE_REG_P (new_vect_var) = true;
754 /* Function vect_create_addr_base_for_vector_ref.
756 Create an expression that computes the address of the first memory location
757 that will be accessed for a data reference.
760 STMT: The statement containing the data reference.
761 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
762 OFFSET: Optional. If supplied, it is be added to the initial address.
763 LOOP: Specify relative to which loop-nest should the address be computed.
764 For example, when the dataref is in an inner-loop nested in an
765 outer-loop that is now being vectorized, LOOP can be either the
766 outer-loop, or the inner-loop. The first memory location accessed
767 by the following dataref ('in' points to short):
774 if LOOP=i_loop: &in (relative to i_loop)
775 if LOOP=j_loop: &in+i*2B (relative to j_loop)
778 1. Return an SSA_NAME whose value is the address of the memory location of
779 the first vector of the data reference.
780 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
781 these statement(s) which define the returned SSA_NAME.
783 FORNOW: We are only handling array accesses with step 1. */
786 vect_create_addr_base_for_vector_ref (tree stmt,
791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
792 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
793 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
794 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
796 tree data_ref_base_var;
799 tree addr_base, addr_expr;
801 tree base_offset = unshare_expr (DR_OFFSET (dr));
802 tree init = unshare_expr (DR_INIT (dr));
803 tree vect_ptr_type, addr_expr2;
804 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
807 if (loop != containing_loop)
809 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
810 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
812 gcc_assert (nested_in_vect_loop_p (loop, stmt));
814 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
815 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
816 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
819 /* Create data_ref_base */
820 base_name = build_fold_indirect_ref (data_ref_base);
821 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
822 add_referenced_var (data_ref_base_var);
823 data_ref_base = force_gimple_operand (data_ref_base, &new_base_stmt,
824 true, data_ref_base_var);
825 append_to_statement_list_force(new_base_stmt, new_stmt_list);
827 /* Create base_offset */
828 base_offset = size_binop (PLUS_EXPR, base_offset, init);
829 base_offset = fold_convert (sizetype, base_offset);
830 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
831 add_referenced_var (dest);
832 base_offset = force_gimple_operand (base_offset, &new_stmt, true, dest);
833 append_to_statement_list_force (new_stmt, new_stmt_list);
837 tree tmp = create_tmp_var (sizetype, "offset");
839 add_referenced_var (tmp);
840 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
841 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
842 base_offset, offset);
843 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
844 append_to_statement_list_force (new_stmt, new_stmt_list);
847 /* base + base_offset */
848 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
849 data_ref_base, base_offset);
851 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
853 /* addr_expr = addr_base */
854 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
855 get_name (base_name));
856 add_referenced_var (addr_expr);
857 vec_stmt = fold_convert (vect_ptr_type, addr_base);
858 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
859 get_name (base_name));
860 add_referenced_var (addr_expr2);
861 vec_stmt = force_gimple_operand (vec_stmt, &new_stmt, false, addr_expr2);
862 append_to_statement_list_force (new_stmt, new_stmt_list);
864 if (vect_print_dump_info (REPORT_DETAILS))
866 fprintf (vect_dump, "created ");
867 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
873 /* Function vect_create_data_ref_ptr.
875 Create a new pointer to vector type (vp), that points to the first location
876 accessed in the loop by STMT, along with the def-use update chain to
877 appropriately advance the pointer through the loop iterations. Also set
878 aliasing information for the pointer. This vector pointer is used by the
879 callers to this function to create a memory reference expression for vector
883 1. STMT: a stmt that references memory. Expected to be of the form
884 GIMPLE_MODIFY_STMT <name, data-ref> or
885 GIMPLE_MODIFY_STMT <data-ref, name>.
886 2. AT_LOOP: the loop where the vector memref is to be created.
887 3. OFFSET (optional): an offset to be added to the initial address accessed
888 by the data-ref in STMT.
889 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
890 pointing to the initial address.
891 5. TYPE: if not NULL indicates the required type of the data-ref
894 1. Declare a new ptr to vector_type, and have it point to the base of the
895 data reference (initial addressed accessed by the data reference).
896 For example, for vector of type V8HI, the following code is generated:
899 vp = (v8hi *)initial_address;
901 if OFFSET is not supplied:
902 initial_address = &a[init];
903 if OFFSET is supplied:
904 initial_address = &a[init + OFFSET];
906 Return the initial_address in INITIAL_ADDRESS.
908 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
909 update the pointer in each iteration of the loop.
911 Return the increment stmt that updates the pointer in PTR_INCR.
913 3. Set INV_P to true if the access pattern of the data reference in the
914 vectorized loop is invariant. Set it to false otherwise.
916 4. Return the pointer. */
919 vect_create_data_ref_ptr (tree stmt, struct loop *at_loop,
920 tree offset, tree *initial_address, tree *ptr_incr,
921 bool only_init, tree type, bool *inv_p)
924 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
925 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
926 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
927 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
928 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
935 tree new_stmt_list = NULL_TREE;
939 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
941 block_stmt_iterator incr_bsi;
943 tree indx_before_incr, indx_after_incr;
947 /* Check the step (evolution) of the load in LOOP, and record
948 whether it's invariant. */
949 if (nested_in_vect_loop)
950 step = STMT_VINFO_DR_STEP (stmt_info);
952 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
954 if (tree_int_cst_compare (step, size_zero_node) == 0)
959 /* Create an expression for the first address accessed by this load
961 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
963 if (vect_print_dump_info (REPORT_DETAILS))
965 tree data_ref_base = base_name;
966 fprintf (vect_dump, "create vector-pointer variable to type: ");
967 print_generic_expr (vect_dump, vectype, TDF_SLIM);
968 if (TREE_CODE (data_ref_base) == VAR_DECL)
969 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
970 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
971 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
972 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
973 fprintf (vect_dump, " vectorizing a record based array ref: ");
974 else if (TREE_CODE (data_ref_base) == SSA_NAME)
975 fprintf (vect_dump, " vectorizing a pointer ref: ");
976 print_generic_expr (vect_dump, base_name, TDF_SLIM);
979 /** (1) Create the new vector-pointer variable: **/
981 vect_ptr_type = build_pointer_type (type);
983 vect_ptr_type = build_pointer_type (vectype);
984 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
985 get_name (base_name));
986 add_referenced_var (vect_ptr);
988 /** (2) Add aliasing information to the new vector-pointer:
989 (The points-to info (DR_PTR_INFO) may be defined later.) **/
991 tag = DR_SYMBOL_TAG (dr);
994 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
995 tag must be created with tag added to its may alias list. */
997 new_type_alias (vect_ptr, tag, DR_REF (dr));
999 set_symbol_mem_tag (vect_ptr, tag);
1001 var_ann (vect_ptr)->subvars = DR_SUBVARS (dr);
1003 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
1004 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
1005 def-use update cycles for the pointer: One relative to the outer-loop
1006 (LOOP), which is what steps (3) and (4) below do. The other is relative
1007 to the inner-loop (which is the inner-most loop containing the dataref),
1008 and this is done be step (5) below.
1010 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
1011 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1012 redundant. Steps (3),(4) create the following:
1015 LOOP: vp1 = phi(vp0,vp2)
1021 If there is an inner-loop nested in loop, then step (5) will also be
1022 applied, and an additional update in the inner-loop will be created:
1025 LOOP: vp1 = phi(vp0,vp2)
1027 inner: vp3 = phi(vp1,vp4)
1028 vp4 = vp3 + inner_step
1034 /** (3) Calculate the initial address the vector-pointer, and set
1035 the vector-pointer to point to it before the loop: **/
1037 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1039 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1041 pe = loop_preheader_edge (loop);
1042 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1043 gcc_assert (!new_bb);
1044 *initial_address = new_temp;
1046 /* Create: p = (vectype *) initial_base */
1047 vec_stmt = fold_convert (vect_ptr_type, new_temp);
1048 vec_stmt = build_gimple_modify_stmt (vect_ptr, vec_stmt);
1049 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
1050 GIMPLE_STMT_OPERAND (vec_stmt, 0) = vect_ptr_init;
1051 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1052 gcc_assert (!new_bb);
1055 /** (4) Handle the updating of the vector-pointer inside the loop.
1056 This is needed when ONLY_INIT is false, and also when AT_LOOP
1057 is the inner-loop nested in LOOP (during outer-loop vectorization).
1060 if (only_init && at_loop == loop) /* No update in loop is required. */
1062 /* Copy the points-to information if it exists. */
1063 if (DR_PTR_INFO (dr))
1064 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
1065 vptr = vect_ptr_init;
1069 /* The step of the vector pointer is the Vector Size. */
1070 tree step = TYPE_SIZE_UNIT (vectype);
1071 /* One exception to the above is when the scalar step of the load in
1072 LOOP is zero. In this case the step here is also zero. */
1074 step = size_zero_node;
1076 standard_iv_increment_position (loop, &incr_bsi, &insert_after);
1078 create_iv (vect_ptr_init,
1079 fold_convert (vect_ptr_type, step),
1080 NULL_TREE, loop, &incr_bsi, insert_after,
1081 &indx_before_incr, &indx_after_incr);
1082 incr = bsi_stmt (incr_bsi);
1083 set_stmt_info (stmt_ann (incr),
1084 new_stmt_vec_info (incr, loop_vinfo));
1086 /* Copy the points-to information if it exists. */
1087 if (DR_PTR_INFO (dr))
1089 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1090 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1092 merge_alias_info (vect_ptr_init, indx_before_incr);
1093 merge_alias_info (vect_ptr_init, indx_after_incr);
1097 vptr = indx_before_incr;
1100 if (!nested_in_vect_loop || only_init)
1104 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1105 nested in LOOP, if exists: **/
1107 gcc_assert (nested_in_vect_loop);
1110 standard_iv_increment_position (containing_loop, &incr_bsi,
1112 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1113 containing_loop, &incr_bsi, insert_after, &indx_before_incr,
1115 incr = bsi_stmt (incr_bsi);
1116 set_stmt_info (stmt_ann (incr), new_stmt_vec_info (incr, loop_vinfo));
1118 /* Copy the points-to information if it exists. */
1119 if (DR_PTR_INFO (dr))
1121 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1122 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1124 merge_alias_info (vect_ptr_init, indx_before_incr);
1125 merge_alias_info (vect_ptr_init, indx_after_incr);
1129 return indx_before_incr;
1136 /* Function bump_vector_ptr
1138 Increment a pointer (to a vector type) by vector-size. If requested,
1139 i.e. if PTR-INCR is given, then also connect the new increment stmt
1140 to the existing def-use update-chain of the pointer, by modifying
1141 the PTR_INCR as illustrated below:
1143 The pointer def-use update-chain before this function:
1144 DATAREF_PTR = phi (p_0, p_2)
1146 PTR_INCR: p_2 = DATAREF_PTR + step
1148 The pointer def-use update-chain after this function:
1149 DATAREF_PTR = phi (p_0, p_2)
1151 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1153 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1156 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1158 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1159 the loop. The increment amount across iterations is expected
1161 BSI - location where the new update stmt is to be placed.
1162 STMT - the original scalar memory-access stmt that is being vectorized.
1163 BUMP - optional. The offset by which to bump the pointer. If not given,
1164 the offset is assumed to be vector_size.
1166 Output: Return NEW_DATAREF_PTR as illustrated above.
1171 bump_vector_ptr (tree dataref_ptr, tree ptr_incr, block_stmt_iterator *bsi,
1172 tree stmt, tree bump)
1174 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1175 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1176 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1177 tree vptr_type = TREE_TYPE (dataref_ptr);
1178 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
1179 tree update = TYPE_SIZE_UNIT (vectype);
1182 use_operand_p use_p;
1183 tree new_dataref_ptr;
1188 incr_stmt = build_gimple_modify_stmt (ptr_var,
1189 build2 (POINTER_PLUS_EXPR, vptr_type,
1190 dataref_ptr, update));
1191 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1192 GIMPLE_STMT_OPERAND (incr_stmt, 0) = new_dataref_ptr;
1193 vect_finish_stmt_generation (stmt, incr_stmt, bsi);
1195 /* Copy the points-to information if it exists. */
1196 if (DR_PTR_INFO (dr))
1197 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1198 merge_alias_info (new_dataref_ptr, dataref_ptr);
1201 return new_dataref_ptr;
1203 /* Update the vector-pointer's cross-iteration increment. */
1204 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1206 tree use = USE_FROM_PTR (use_p);
1208 if (use == dataref_ptr)
1209 SET_USE (use_p, new_dataref_ptr);
1211 gcc_assert (tree_int_cst_compare (use, update) == 0);
1214 return new_dataref_ptr;
1218 /* Function vect_create_destination_var.
1220 Create a new temporary of type VECTYPE. */
1223 vect_create_destination_var (tree scalar_dest, tree vectype)
1226 const char *new_name;
1228 enum vect_var_kind kind;
1230 kind = vectype ? vect_simple_var : vect_scalar_var;
1231 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1233 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1235 new_name = get_name (scalar_dest);
1238 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1239 add_referenced_var (vec_dest);
1245 /* Function vect_init_vector.
1247 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1248 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1249 is not NULL. Otherwise, place the initialization at the loop preheader.
1250 Return the DEF of INIT_STMT.
1251 It will be used in the vectorization of STMT. */
1254 vect_init_vector (tree stmt, tree vector_var, tree vector_type,
1255 block_stmt_iterator *bsi)
1257 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1265 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1266 add_referenced_var (new_var);
1267 init_stmt = build_gimple_modify_stmt (new_var, vector_var);
1268 new_temp = make_ssa_name (new_var, init_stmt);
1269 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_temp;
1272 vect_finish_stmt_generation (stmt, init_stmt, bsi);
1275 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1276 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1278 if (nested_in_vect_loop_p (loop, stmt))
1280 pe = loop_preheader_edge (loop);
1281 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1282 gcc_assert (!new_bb);
1285 if (vect_print_dump_info (REPORT_DETAILS))
1287 fprintf (vect_dump, "created new init_stmt: ");
1288 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1291 vec_oprnd = GIMPLE_STMT_OPERAND (init_stmt, 0);
1296 /* For constant and loop invariant defs of SLP_NODE this function returns
1297 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1298 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1302 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1303 unsigned int op_num)
1305 VEC (tree, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1306 tree stmt = VEC_index (tree, stmts, 0);
1307 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1308 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1309 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1312 int j, number_of_places_left_in_vector;
1314 tree op, vop, operation;
1315 int group_size = VEC_length (tree, stmts);
1316 unsigned int vec_num, i;
1317 int number_of_copies = 1;
1318 bool is_store = false;
1319 unsigned int number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1320 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1323 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1326 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1327 created vectors. It is greater than 1 if unrolling is performed.
1329 For example, we have two scalar operands, s1 and s2 (e.g., group of
1330 strided accesses of size two), while NUINTS is four (i.e., four scalars
1331 of this type can be packed in a vector). The output vector will contain
1332 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1335 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1336 containing the operands.
1338 For example, NUINTS is four as before, and the group size is 8
1339 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1340 {s5, s6, s7, s8}. */
1342 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1344 number_of_places_left_in_vector = nunits;
1346 for (j = 0; j < number_of_copies; j++)
1348 for (i = group_size - 1; VEC_iterate (tree, stmts, i, stmt); i--)
1350 operation = GIMPLE_STMT_OPERAND (stmt, 1);
1354 op = TREE_OPERAND (operation, op_num);
1355 if (!CONSTANT_CLASS_P (op))
1358 /* Create 'vect_ = {op0,op1,...,opn}'. */
1359 t = tree_cons (NULL_TREE, op, t);
1361 number_of_places_left_in_vector--;
1363 if (number_of_places_left_in_vector == 0)
1365 number_of_places_left_in_vector = nunits;
1367 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1368 gcc_assert (vector_type);
1370 vec_cst = build_vector (vector_type, t);
1372 vec_cst = build_constructor_from_list (vector_type, t);
1374 VEC_quick_push (tree, voprnds,
1375 vect_init_vector (stmt, vec_cst, vector_type,
1382 /* Since the vectors are created in the reverse order, we should invert
1384 vec_num = VEC_length (tree, voprnds);
1385 for (j = vec_num - 1; j >= 0; j--)
1387 vop = VEC_index (tree, voprnds, j);
1388 VEC_quick_push (tree, *vec_oprnds, vop);
1391 VEC_free (tree, heap, voprnds);
1393 /* In case that VF is greater than the unrolling factor needed for the SLP
1394 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1395 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1396 to replicate the vectors. */
1397 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1399 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1400 VEC_quick_push (tree, *vec_oprnds, vop);
1405 /* Get vectorized definitions from SLP_NODE that contains corresponding
1406 vectorized def-stmts. */
1409 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1415 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1418 VEC_iterate (tree, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1421 gcc_assert (vec_def_stmt);
1422 vec_oprnd = GIMPLE_STMT_OPERAND (vec_def_stmt, 0);
1423 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1428 /* Get vectorized definitions for SLP_NODE.
1429 If the scalar definitions are loop invariants or constants, collect them and
1430 call vect_get_constant_vectors() to create vector stmts.
1431 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1432 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1433 vect_get_slp_vect_defs() to retrieve them.
1434 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1435 the right node. This is used when the second operand must remain scalar. */
1438 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1439 VEC (tree,heap) **vec_oprnds1)
1441 tree operation, first_stmt;
1443 /* Allocate memory for vectorized defs. */
1444 *vec_oprnds0 = VEC_alloc (tree, heap,
1445 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1447 /* SLP_NODE corresponds either to a group of stores or to a group of
1448 unary/binary operations. We don't call this function for loads. */
1449 if (SLP_TREE_LEFT (slp_node))
1450 /* The defs are already vectorized. */
1451 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1453 /* Build vectors from scalar defs. */
1454 vect_get_constant_vectors (slp_node, vec_oprnds0, 0);
1456 first_stmt = VEC_index (tree, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1457 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1458 /* Since we don't call this function with loads, this is a group of
1462 operation = GIMPLE_STMT_OPERAND (first_stmt, 1);
1463 if (TREE_OPERAND_LENGTH (operation) == unary_op || !vec_oprnds1)
1466 *vec_oprnds1 = VEC_alloc (tree, heap,
1467 SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node));
1469 if (SLP_TREE_RIGHT (slp_node))
1470 /* The defs are already vectorized. */
1471 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1473 /* Build vectors from scalar defs. */
1474 vect_get_constant_vectors (slp_node, vec_oprnds1, 1);
1478 /* Function get_initial_def_for_induction
1481 STMT - a stmt that performs an induction operation in the loop.
1482 IV_PHI - the initial value of the induction variable
1485 Return a vector variable, initialized with the first VF values of
1486 the induction variable. E.g., for an iv with IV_PHI='X' and
1487 evolution S, for a vector of 4 units, we want to return:
1488 [X, X + S, X + 2*S, X + 3*S]. */
1491 get_initial_def_for_induction (tree iv_phi)
1493 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1494 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1495 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1496 tree scalar_type = TREE_TYPE (PHI_RESULT_TREE (iv_phi));
1499 edge pe = loop_preheader_edge (loop);
1500 struct loop *iv_loop;
1502 tree vec, vec_init, vec_step, t;
1507 tree induction_phi, induc_def, new_stmt, vec_def, vec_dest;
1508 tree init_expr, step_expr;
1509 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1514 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1515 bool nested_in_vect_loop = false;
1517 imm_use_iterator imm_iter;
1518 use_operand_p use_p;
1522 block_stmt_iterator si;
1523 basic_block bb = bb_for_stmt (iv_phi);
1525 vectype = get_vectype_for_scalar_type (scalar_type);
1526 gcc_assert (vectype);
1527 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1528 ncopies = vf / nunits;
1530 gcc_assert (phi_info);
1531 gcc_assert (ncopies >= 1);
1533 /* Find the first insertion point in the BB. */
1534 si = bsi_after_labels (bb);
1536 if (INTEGRAL_TYPE_P (scalar_type))
1537 step_expr = build_int_cst (scalar_type, 0);
1539 step_expr = build_real (scalar_type, dconst0);
1541 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1542 if (nested_in_vect_loop_p (loop, iv_phi))
1544 nested_in_vect_loop = true;
1545 iv_loop = loop->inner;
1549 gcc_assert (iv_loop == (bb_for_stmt (iv_phi))->loop_father);
1551 latch_e = loop_latch_edge (iv_loop);
1552 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1554 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
1555 gcc_assert (access_fn);
1556 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1557 &init_expr, &step_expr);
1559 pe = loop_preheader_edge (iv_loop);
1561 /* Create the vector that holds the initial_value of the induction. */
1562 if (nested_in_vect_loop)
1564 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1565 been created during vectorization of previous stmts; We obtain it from
1566 the STMT_VINFO_VEC_STMT of the defining stmt. */
1567 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1568 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
1572 /* iv_loop is the loop to be vectorized. Create:
1573 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1574 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1575 add_referenced_var (new_var);
1577 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1580 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
1581 gcc_assert (!new_bb);
1585 t = tree_cons (NULL_TREE, init_expr, t);
1586 for (i = 1; i < nunits; i++)
1590 /* Create: new_name_i = new_name + step_expr */
1591 tmp = fold_build2 (PLUS_EXPR, scalar_type, new_name, step_expr);
1592 init_stmt = build_gimple_modify_stmt (new_var, tmp);
1593 new_name = make_ssa_name (new_var, init_stmt);
1594 GIMPLE_STMT_OPERAND (init_stmt, 0) = new_name;
1596 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1597 gcc_assert (!new_bb);
1599 if (vect_print_dump_info (REPORT_DETAILS))
1601 fprintf (vect_dump, "created new init_stmt: ");
1602 print_generic_expr (vect_dump, init_stmt, TDF_SLIM);
1604 t = tree_cons (NULL_TREE, new_name, t);
1606 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1607 vec = build_constructor_from_list (vectype, nreverse (t));
1608 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
1612 /* Create the vector that holds the step of the induction. */
1613 if (nested_in_vect_loop)
1614 /* iv_loop is nested in the loop to be vectorized. Generate:
1615 vec_step = [S, S, S, S] */
1616 new_name = step_expr;
1619 /* iv_loop is the loop to be vectorized. Generate:
1620 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1621 expr = build_int_cst (scalar_type, vf);
1622 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1626 for (i = 0; i < nunits; i++)
1627 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1628 gcc_assert (CONSTANT_CLASS_P (new_name));
1629 vec = build_vector (vectype, t);
1630 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1633 /* Create the following def-use cycle:
1638 vec_iv = PHI <vec_init, vec_loop>
1642 vec_loop = vec_iv + vec_step; */
1644 /* Create the induction-phi that defines the induction-operand. */
1645 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1646 add_referenced_var (vec_dest);
1647 induction_phi = create_phi_node (vec_dest, iv_loop->header);
1648 set_stmt_info (get_stmt_ann (induction_phi),
1649 new_stmt_vec_info (induction_phi, loop_vinfo));
1650 induc_def = PHI_RESULT (induction_phi);
1652 /* Create the iv update inside the loop */
1653 new_stmt = build_gimple_modify_stmt (NULL_TREE,
1654 build2 (PLUS_EXPR, vectype,
1655 induc_def, vec_step));
1656 vec_def = make_ssa_name (vec_dest, new_stmt);
1657 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1658 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1659 set_stmt_info (get_stmt_ann (new_stmt),
1660 new_stmt_vec_info (new_stmt, loop_vinfo));
1662 /* Set the arguments of the phi node: */
1663 add_phi_arg (induction_phi, vec_init, pe);
1664 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
1667 /* In case that vectorization factor (VF) is bigger than the number
1668 of elements that we can fit in a vectype (nunits), we have to generate
1669 more than one vector stmt - i.e - we need to "unroll" the
1670 vector stmt by a factor VF/nunits. For more details see documentation
1671 in vectorizable_operation. */
1675 stmt_vec_info prev_stmt_vinfo;
1676 /* FORNOW. This restriction should be relaxed. */
1677 gcc_assert (!nested_in_vect_loop);
1679 /* Create the vector that holds the step of the induction. */
1680 expr = build_int_cst (scalar_type, nunits);
1681 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1683 for (i = 0; i < nunits; i++)
1684 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1685 gcc_assert (CONSTANT_CLASS_P (new_name));
1686 vec = build_vector (vectype, t);
1687 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1689 vec_def = induc_def;
1690 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1691 for (i = 1; i < ncopies; i++)
1695 /* vec_i = vec_prev + vec_step */
1696 tmp = build2 (PLUS_EXPR, vectype, vec_def, vec_step);
1697 new_stmt = build_gimple_modify_stmt (NULL_TREE, tmp);
1698 vec_def = make_ssa_name (vec_dest, new_stmt);
1699 GIMPLE_STMT_OPERAND (new_stmt, 0) = vec_def;
1700 bsi_insert_before (&si, new_stmt, BSI_SAME_STMT);
1701 set_stmt_info (get_stmt_ann (new_stmt),
1702 new_stmt_vec_info (new_stmt, loop_vinfo));
1703 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1704 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1708 if (nested_in_vect_loop)
1710 /* Find the loop-closed exit-phi of the induction, and record
1711 the final vector of induction results: */
1713 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1715 if (!flow_bb_inside_loop_p (iv_loop, bb_for_stmt (USE_STMT (use_p))))
1717 exit_phi = USE_STMT (use_p);
1723 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1724 /* FORNOW. Currently not supporting the case that an inner-loop induction
1725 is not used in the outer-loop (i.e. only outside the outer-loop). */
1726 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1727 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1729 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1730 if (vect_print_dump_info (REPORT_DETAILS))
1732 fprintf (vect_dump, "vector of inductions after inner-loop:");
1733 print_generic_expr (vect_dump, new_stmt, TDF_SLIM);
1739 if (vect_print_dump_info (REPORT_DETAILS))
1741 fprintf (vect_dump, "transform induction: created def-use cycle:");
1742 print_generic_expr (vect_dump, induction_phi, TDF_SLIM);
1743 fprintf (vect_dump, "\n");
1744 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vec_def), TDF_SLIM);
1747 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1752 /* Function vect_get_vec_def_for_operand.
1754 OP is an operand in STMT. This function returns a (vector) def that will be
1755 used in the vectorized stmt for STMT.
1757 In the case that OP is an SSA_NAME which is defined in the loop, then
1758 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1760 In case OP is an invariant or constant, a new stmt that creates a vector def
1761 needs to be introduced. */
1764 vect_get_vec_def_for_operand (tree op, tree stmt, tree *scalar_def)
1769 stmt_vec_info def_stmt_info = NULL;
1770 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1771 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1772 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1773 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1779 enum vect_def_type dt;
1783 if (vect_print_dump_info (REPORT_DETAILS))
1785 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1786 print_generic_expr (vect_dump, op, TDF_SLIM);
1789 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1790 gcc_assert (is_simple_use);
1791 if (vect_print_dump_info (REPORT_DETAILS))
1795 fprintf (vect_dump, "def = ");
1796 print_generic_expr (vect_dump, def, TDF_SLIM);
1800 fprintf (vect_dump, " def_stmt = ");
1801 print_generic_expr (vect_dump, def_stmt, TDF_SLIM);
1807 /* Case 1: operand is a constant. */
1808 case vect_constant_def:
1813 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1814 if (vect_print_dump_info (REPORT_DETAILS))
1815 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1817 for (i = nunits - 1; i >= 0; --i)
1819 t = tree_cons (NULL_TREE, op, t);
1821 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1822 gcc_assert (vector_type);
1823 vec_cst = build_vector (vector_type, t);
1825 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1828 /* Case 2: operand is defined outside the loop - loop invariant. */
1829 case vect_invariant_def:
1834 /* Create 'vec_inv = {inv,inv,..,inv}' */
1835 if (vect_print_dump_info (REPORT_DETAILS))
1836 fprintf (vect_dump, "Create vector_inv.");
1838 for (i = nunits - 1; i >= 0; --i)
1840 t = tree_cons (NULL_TREE, def, t);
1843 /* FIXME: use build_constructor directly. */
1844 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1845 gcc_assert (vector_type);
1846 vec_inv = build_constructor_from_list (vector_type, t);
1847 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1850 /* Case 3: operand is defined inside the loop. */
1854 *scalar_def = def_stmt;
1856 /* Get the def from the vectorized stmt. */
1857 def_stmt_info = vinfo_for_stmt (def_stmt);
1858 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1859 gcc_assert (vec_stmt);
1860 if (TREE_CODE (vec_stmt) == PHI_NODE)
1861 vec_oprnd = PHI_RESULT (vec_stmt);
1863 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt, 0);
1867 /* Case 4: operand is defined by a loop header phi - reduction */
1868 case vect_reduction_def:
1872 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1873 loop = (bb_for_stmt (def_stmt))->loop_father;
1875 /* Get the def before the loop */
1876 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1877 return get_initial_def_for_reduction (stmt, op, scalar_def);
1880 /* Case 5: operand is defined by loop-header phi - induction. */
1881 case vect_induction_def:
1883 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
1885 /* Get the def from the vectorized stmt. */
1886 def_stmt_info = vinfo_for_stmt (def_stmt);
1887 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1888 gcc_assert (vec_stmt && (TREE_CODE (vec_stmt) == PHI_NODE));
1889 vec_oprnd = PHI_RESULT (vec_stmt);
1899 /* Function vect_get_vec_def_for_stmt_copy
1901 Return a vector-def for an operand. This function is used when the
1902 vectorized stmt to be created (by the caller to this function) is a "copy"
1903 created in case the vectorized result cannot fit in one vector, and several
1904 copies of the vector-stmt are required. In this case the vector-def is
1905 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
1906 of the stmt that defines VEC_OPRND.
1907 DT is the type of the vector def VEC_OPRND.
1910 In case the vectorization factor (VF) is bigger than the number
1911 of elements that can fit in a vectype (nunits), we have to generate
1912 more than one vector stmt to vectorize the scalar stmt. This situation
1913 arises when there are multiple data-types operated upon in the loop; the
1914 smallest data-type determines the VF, and as a result, when vectorizing
1915 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
1916 vector stmt (each computing a vector of 'nunits' results, and together
1917 computing 'VF' results in each iteration). This function is called when
1918 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
1919 which VF=16 and nunits=4, so the number of copies required is 4):
1921 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
1923 S1: x = load VS1.0: vx.0 = memref0 VS1.1
1924 VS1.1: vx.1 = memref1 VS1.2
1925 VS1.2: vx.2 = memref2 VS1.3
1926 VS1.3: vx.3 = memref3
1928 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
1929 VSnew.1: vz1 = vx.1 + ... VSnew.2
1930 VSnew.2: vz2 = vx.2 + ... VSnew.3
1931 VSnew.3: vz3 = vx.3 + ...
1933 The vectorization of S1 is explained in vectorizable_load.
1934 The vectorization of S2:
1935 To create the first vector-stmt out of the 4 copies - VSnew.0 -
1936 the function 'vect_get_vec_def_for_operand' is called to
1937 get the relevant vector-def for each operand of S2. For operand x it
1938 returns the vector-def 'vx.0'.
1940 To create the remaining copies of the vector-stmt (VSnew.j), this
1941 function is called to get the relevant vector-def for each operand. It is
1942 obtained from the respective VS1.j stmt, which is recorded in the
1943 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
1945 For example, to obtain the vector-def 'vx.1' in order to create the
1946 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
1947 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
1948 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
1949 and return its def ('vx.1').
1950 Overall, to create the above sequence this function will be called 3 times:
1951 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
1952 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
1953 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
1956 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
1958 tree vec_stmt_for_operand;
1959 stmt_vec_info def_stmt_info;
1961 /* Do nothing; can reuse same def. */
1962 if (dt == vect_invariant_def || dt == vect_constant_def )
1965 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
1966 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
1967 gcc_assert (def_stmt_info);
1968 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
1969 gcc_assert (vec_stmt_for_operand);
1970 vec_oprnd = GIMPLE_STMT_OPERAND (vec_stmt_for_operand, 0);
1975 /* Get vectorized definitions for the operands to create a copy of an original
1976 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
1979 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
1980 VEC(tree,heap) **vec_oprnds0,
1981 VEC(tree,heap) **vec_oprnds1)
1983 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
1985 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
1986 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
1988 if (vec_oprnds1 && *vec_oprnds1)
1990 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
1991 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
1992 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
1997 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
2000 vect_get_vec_defs (tree op0, tree op1, tree stmt, VEC(tree,heap) **vec_oprnds0,
2001 VEC(tree,heap) **vec_oprnds1, slp_tree slp_node)
2004 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
2009 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
2010 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
2011 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
2015 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
2016 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
2017 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
2023 /* Function vect_finish_stmt_generation.
2025 Insert a new stmt. */
2028 vect_finish_stmt_generation (tree stmt, tree vec_stmt,
2029 block_stmt_iterator *bsi)
2031 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2032 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2034 gcc_assert (stmt == bsi_stmt (*bsi));
2035 gcc_assert (TREE_CODE (stmt) != LABEL_EXPR);
2037 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2039 set_stmt_info (get_stmt_ann (vec_stmt),
2040 new_stmt_vec_info (vec_stmt, loop_vinfo));
2042 if (vect_print_dump_info (REPORT_DETAILS))
2044 fprintf (vect_dump, "add new stmt: ");
2045 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2048 /* Make sure bsi points to the stmt that is being vectorized. */
2049 gcc_assert (stmt == bsi_stmt (*bsi));
2051 #ifdef USE_MAPPED_LOCATION
2052 SET_EXPR_LOCATION (vec_stmt, EXPR_LOCATION (stmt));
2054 SET_EXPR_LOCUS (vec_stmt, EXPR_LOCUS (stmt));
2059 /* Function get_initial_def_for_reduction
2062 STMT - a stmt that performs a reduction operation in the loop.
2063 INIT_VAL - the initial value of the reduction variable
2066 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2067 of the reduction (used for adjusting the epilog - see below).
2068 Return a vector variable, initialized according to the operation that STMT
2069 performs. This vector will be used as the initial value of the
2070 vector of partial results.
2072 Option1 (adjust in epilog): Initialize the vector as follows:
2075 min/max: [init_val,init_val,..,init_val,init_val]
2076 bit and/or: [init_val,init_val,..,init_val,init_val]
2077 and when necessary (e.g. add/mult case) let the caller know
2078 that it needs to adjust the result by init_val.
2080 Option2: Initialize the vector as follows:
2081 add: [0,0,...,0,init_val]
2082 mult: [1,1,...,1,init_val]
2083 min/max: [init_val,init_val,...,init_val]
2084 bit and/or: [init_val,init_val,...,init_val]
2085 and no adjustments are needed.
2087 For example, for the following code:
2093 STMT is 's = s + a[i]', and the reduction variable is 's'.
2094 For a vector of 4 units, we want to return either [0,0,0,init_val],
2095 or [0,0,0,0] and let the caller know that it needs to adjust
2096 the result at the end by 'init_val'.
2098 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2099 initialization vector is simpler (same element in all entries).
2100 A cost model should help decide between these two schemes. */
2103 get_initial_def_for_reduction (tree stmt, tree init_val, tree *adjustment_def)
2105 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2106 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2108 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2109 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2110 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2111 tree type = TREE_TYPE (init_val);
2118 bool nested_in_vect_loop = false;
2120 gcc_assert (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
2121 if (nested_in_vect_loop_p (loop, stmt))
2122 nested_in_vect_loop = true;
2124 gcc_assert (loop == (bb_for_stmt (stmt))->loop_father);
2126 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2130 case WIDEN_SUM_EXPR:
2133 if (nested_in_vect_loop)
2134 *adjustment_def = vecdef;
2136 *adjustment_def = init_val;
2137 /* Create a vector of zeros for init_def. */
2138 if (SCALAR_FLOAT_TYPE_P (type))
2139 def_for_init = build_real (type, dconst0);
2141 def_for_init = build_int_cst (type, 0);
2142 for (i = nunits - 1; i >= 0; --i)
2143 t = tree_cons (NULL_TREE, def_for_init, t);
2144 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
2145 gcc_assert (vector_type);
2146 init_def = build_vector (vector_type, t);
2151 *adjustment_def = NULL_TREE;
2163 /* Function vect_create_epilog_for_reduction
2165 Create code at the loop-epilog to finalize the result of a reduction
2168 VECT_DEF is a vector of partial results.
2169 REDUC_CODE is the tree-code for the epilog reduction.
2170 STMT is the scalar reduction stmt that is being vectorized.
2171 REDUCTION_PHI is the phi-node that carries the reduction computation.
2174 1. Creates the reduction def-use cycle: sets the arguments for
2176 The loop-entry argument is the vectorized initial-value of the reduction.
2177 The loop-latch argument is VECT_DEF - the vector of partial sums.
2178 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2179 by applying the operation specified by REDUC_CODE if available, or by
2180 other means (whole-vector shifts or a scalar loop).
2181 The function also creates a new phi node at the loop exit to preserve
2182 loop-closed form, as illustrated below.
2184 The flow at the entry to this function:
2187 vec_def = phi <null, null> # REDUCTION_PHI
2188 VECT_DEF = vector_stmt # vectorized form of STMT
2189 s_loop = scalar_stmt # (scalar) STMT
2191 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2195 The above is transformed by this function into:
2198 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2199 VECT_DEF = vector_stmt # vectorized form of STMT
2200 s_loop = scalar_stmt # (scalar) STMT
2202 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2203 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2204 v_out2 = reduce <v_out1>
2205 s_out3 = extract_field <v_out2, 0>
2206 s_out4 = adjust_result <s_out3>
2212 vect_create_epilog_for_reduction (tree vect_def, tree stmt,
2213 enum tree_code reduc_code, tree reduction_phi)
2215 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2217 enum machine_mode mode;
2218 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2219 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2220 basic_block exit_bb;
2224 block_stmt_iterator exit_bsi;
2226 tree new_temp = NULL_TREE;
2228 tree epilog_stmt = NULL_TREE;
2229 tree new_scalar_dest, exit_phi, new_dest;
2230 tree bitsize, bitpos, bytesize;
2231 enum tree_code code = TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 1));
2232 tree adjustment_def;
2233 tree vec_initial_def;
2235 imm_use_iterator imm_iter;
2236 use_operand_p use_p;
2237 bool extract_scalar_result = false;
2238 tree reduction_op, expr;
2241 tree operation = GIMPLE_STMT_OPERAND (stmt, 1);
2242 bool nested_in_vect_loop = false;
2244 VEC(tree,heap) *phis = NULL;
2247 if (nested_in_vect_loop_p (loop, stmt))
2250 nested_in_vect_loop = true;
2253 op_type = TREE_OPERAND_LENGTH (operation);
2254 reduction_op = TREE_OPERAND (operation, op_type-1);
2255 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2256 gcc_assert (vectype);
2257 mode = TYPE_MODE (vectype);
2259 /*** 1. Create the reduction def-use cycle ***/
2261 /* 1.1 set the loop-entry arg of the reduction-phi: */
2262 /* For the case of reduction, vect_get_vec_def_for_operand returns
2263 the scalar def before the loop, that defines the initial value
2264 of the reduction variable. */
2265 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2267 add_phi_arg (reduction_phi, vec_initial_def, loop_preheader_edge (loop));
2269 /* 1.2 set the loop-latch arg for the reduction-phi: */
2270 add_phi_arg (reduction_phi, vect_def, loop_latch_edge (loop));
2272 if (vect_print_dump_info (REPORT_DETAILS))
2274 fprintf (vect_dump, "transform reduction: created def-use cycle:");
2275 print_generic_expr (vect_dump, reduction_phi, TDF_SLIM);
2276 fprintf (vect_dump, "\n");
2277 print_generic_expr (vect_dump, SSA_NAME_DEF_STMT (vect_def), TDF_SLIM);
2281 /*** 2. Create epilog code
2282 The reduction epilog code operates across the elements of the vector
2283 of partial results computed by the vectorized loop.
2284 The reduction epilog code consists of:
2285 step 1: compute the scalar result in a vector (v_out2)
2286 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2287 step 3: adjust the scalar result (s_out3) if needed.
2289 Step 1 can be accomplished using one the following three schemes:
2290 (scheme 1) using reduc_code, if available.
2291 (scheme 2) using whole-vector shifts, if available.
2292 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2295 The overall epilog code looks like this:
2297 s_out0 = phi <s_loop> # original EXIT_PHI
2298 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2299 v_out2 = reduce <v_out1> # step 1
2300 s_out3 = extract_field <v_out2, 0> # step 2
2301 s_out4 = adjust_result <s_out3> # step 3
2303 (step 3 is optional, and step2 1 and 2 may be combined).
2304 Lastly, the uses of s_out0 are replaced by s_out4.
2308 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2309 v_out1 = phi <v_loop> */
2311 exit_bb = single_exit (loop)->dest;
2312 new_phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
2313 SET_PHI_ARG_DEF (new_phi, single_exit (loop)->dest_idx, vect_def);
2314 exit_bsi = bsi_after_labels (exit_bb);
2316 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2317 (i.e. when reduc_code is not available) and in the final adjustment
2318 code (if needed). Also get the original scalar reduction variable as
2319 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2320 represents a reduction pattern), the tree-code and scalar-def are
2321 taken from the original stmt that the pattern-stmt (STMT) replaces.
2322 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2323 are taken from STMT. */
2325 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2328 /* Regular reduction */
2333 /* Reduction pattern */
2334 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2335 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2336 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2338 code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2339 scalar_dest = GIMPLE_STMT_OPERAND (orig_stmt, 0);
2340 scalar_type = TREE_TYPE (scalar_dest);
2341 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2342 bitsize = TYPE_SIZE (scalar_type);
2343 bytesize = TYPE_SIZE_UNIT (scalar_type);
2346 /* In case this is a reduction in an inner-loop while vectorizing an outer
2347 loop - we don't need to extract a single scalar result at the end of the
2348 inner-loop. The final vector of partial results will be used in the
2349 vectorized outer-loop, or reduced to a scalar result at the end of the
2351 if (nested_in_vect_loop)
2352 goto vect_finalize_reduction;
2354 /* 2.3 Create the reduction code, using one of the three schemes described
2357 if (reduc_code < NUM_TREE_CODES)
2361 /*** Case 1: Create:
2362 v_out2 = reduc_expr <v_out1> */
2364 if (vect_print_dump_info (REPORT_DETAILS))
2365 fprintf (vect_dump, "Reduce using direct vector reduction.");
2367 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2368 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2369 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2370 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2371 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2372 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2374 extract_scalar_result = true;
2378 enum tree_code shift_code = 0;
2379 bool have_whole_vector_shift = true;
2381 int element_bitsize = tree_low_cst (bitsize, 1);
2382 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2385 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2386 shift_code = VEC_RSHIFT_EXPR;
2388 have_whole_vector_shift = false;
2390 /* Regardless of whether we have a whole vector shift, if we're
2391 emulating the operation via tree-vect-generic, we don't want
2392 to use it. Only the first round of the reduction is likely
2393 to still be profitable via emulation. */
2394 /* ??? It might be better to emit a reduction tree code here, so that
2395 tree-vect-generic can expand the first round via bit tricks. */
2396 if (!VECTOR_MODE_P (mode))
2397 have_whole_vector_shift = false;
2400 optab optab = optab_for_tree_code (code, vectype);
2401 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
2402 have_whole_vector_shift = false;
2405 if (have_whole_vector_shift)
2407 /*** Case 2: Create:
2408 for (offset = VS/2; offset >= element_size; offset/=2)
2410 Create: va' = vec_shift <va, offset>
2411 Create: va = vop <va, va'>
2414 if (vect_print_dump_info (REPORT_DETAILS))
2415 fprintf (vect_dump, "Reduce using vector shifts");
2417 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2418 new_temp = PHI_RESULT (new_phi);
2420 for (bit_offset = vec_size_in_bits/2;
2421 bit_offset >= element_bitsize;
2424 tree bitpos = size_int (bit_offset);
2425 tree tmp = build2 (shift_code, vectype, new_temp, bitpos);
2426 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2427 new_name = make_ssa_name (vec_dest, epilog_stmt);
2428 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2429 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2431 tmp = build2 (code, vectype, new_name, new_temp);
2432 epilog_stmt = build_gimple_modify_stmt (vec_dest, tmp);
2433 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2434 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2435 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2438 extract_scalar_result = true;
2444 /*** Case 3: Create:
2445 s = extract_field <v_out2, 0>
2446 for (offset = element_size;
2447 offset < vector_size;
2448 offset += element_size;)
2450 Create: s' = extract_field <v_out2, offset>
2451 Create: s = op <s, s'>
2454 if (vect_print_dump_info (REPORT_DETAILS))
2455 fprintf (vect_dump, "Reduce using scalar code. ");
2457 vec_temp = PHI_RESULT (new_phi);
2458 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2459 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2461 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2462 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2463 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2464 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2465 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2467 for (bit_offset = element_bitsize;
2468 bit_offset < vec_size_in_bits;
2469 bit_offset += element_bitsize)
2472 tree bitpos = bitsize_int (bit_offset);
2473 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2476 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2477 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2478 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
2479 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_name;
2480 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2482 tmp = build2 (code, scalar_type, new_name, new_temp);
2483 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, tmp);
2484 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2485 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2486 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2489 extract_scalar_result = false;
2493 /* 2.4 Extract the final scalar result. Create:
2494 s_out3 = extract_field <v_out2, bitpos> */
2496 if (extract_scalar_result)
2500 gcc_assert (!nested_in_vect_loop);
2501 if (vect_print_dump_info (REPORT_DETAILS))
2502 fprintf (vect_dump, "extract scalar result");
2504 if (BYTES_BIG_ENDIAN)
2505 bitpos = size_binop (MULT_EXPR,
2506 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2507 TYPE_SIZE (scalar_type));
2509 bitpos = bitsize_zero_node;
2511 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2512 BIT_FIELD_REF_UNSIGNED (rhs) = TYPE_UNSIGNED (scalar_type);
2513 epilog_stmt = build_gimple_modify_stmt (new_scalar_dest, rhs);
2514 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2515 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2516 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2519 vect_finalize_reduction:
2521 /* 2.5 Adjust the final result by the initial value of the reduction
2522 variable. (When such adjustment is not needed, then
2523 'adjustment_def' is zero). For example, if code is PLUS we create:
2524 new_temp = loop_exit_def + adjustment_def */
2528 if (nested_in_vect_loop)
2530 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2531 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2532 new_dest = vect_create_destination_var (scalar_dest, vectype);
2536 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2537 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2538 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2540 epilog_stmt = build_gimple_modify_stmt (new_dest, expr);
2541 new_temp = make_ssa_name (new_dest, epilog_stmt);
2542 GIMPLE_STMT_OPERAND (epilog_stmt, 0) = new_temp;
2543 bsi_insert_before (&exit_bsi, epilog_stmt, BSI_SAME_STMT);
2547 /* 2.6 Handle the loop-exit phi */
2549 /* Replace uses of s_out0 with uses of s_out3:
2550 Find the loop-closed-use at the loop exit of the original scalar result.
2551 (The reduction result is expected to have two immediate uses - one at the
2552 latch block, and one at the loop exit). */
2553 phis = VEC_alloc (tree, heap, 10);
2554 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2556 if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
2558 exit_phi = USE_STMT (use_p);
2559 VEC_quick_push (tree, phis, exit_phi);
2562 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2563 gcc_assert (!VEC_empty (tree, phis));
2565 for (i = 0; VEC_iterate (tree, phis, i, exit_phi); i++)
2567 if (nested_in_vect_loop)
2569 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2571 /* FORNOW. Currently not supporting the case that an inner-loop reduction
2572 is not used in the outer-loop (but only outside the outer-loop). */
2573 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2574 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2576 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2577 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2578 set_stmt_info (get_stmt_ann (epilog_stmt),
2579 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2583 /* Replace the uses: */
2584 orig_name = PHI_RESULT (exit_phi);
2585 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2586 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2587 SET_USE (use_p, new_temp);
2589 VEC_free (tree, heap, phis);
2593 /* Function vectorizable_reduction.
2595 Check if STMT performs a reduction operation that can be vectorized.
2596 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2597 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2598 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2600 This function also handles reduction idioms (patterns) that have been
2601 recognized in advance during vect_pattern_recog. In this case, STMT may be
2603 X = pattern_expr (arg0, arg1, ..., X)
2604 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2605 sequence that had been detected and replaced by the pattern-stmt (STMT).
2607 In some cases of reduction patterns, the type of the reduction variable X is
2608 different than the type of the other arguments of STMT.
2609 In such cases, the vectype that is used when transforming STMT into a vector
2610 stmt is different than the vectype that is used to determine the
2611 vectorization factor, because it consists of a different number of elements
2612 than the actual number of elements that are being operated upon in parallel.
2614 For example, consider an accumulation of shorts into an int accumulator.
2615 On some targets it's possible to vectorize this pattern operating on 8
2616 shorts at a time (hence, the vectype for purposes of determining the
2617 vectorization factor should be V8HI); on the other hand, the vectype that
2618 is used to create the vector form is actually V4SI (the type of the result).
2620 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2621 indicates what is the actual level of parallelism (V8HI in the example), so
2622 that the right vectorization factor would be derived. This vectype
2623 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2624 be used to create the vectorized stmt. The right vectype for the vectorized
2625 stmt is obtained from the type of the result X:
2626 get_vectype_for_scalar_type (TREE_TYPE (X))
2628 This means that, contrary to "regular" reductions (or "regular" stmts in
2629 general), the following equation:
2630 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2631 does *NOT* necessarily hold for reduction patterns. */
2634 vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2639 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2640 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2641 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2642 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2643 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2645 enum tree_code code, orig_code, epilog_reduc_code = 0;
2646 enum machine_mode vec_mode;
2648 optab optab, reduc_optab;
2649 tree new_temp = NULL_TREE;
2651 enum vect_def_type dt;
2656 stmt_vec_info orig_stmt_info;
2657 tree expr = NULL_TREE;
2659 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2660 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2661 stmt_vec_info prev_stmt_info;
2663 tree new_stmt = NULL_TREE;
2666 if (nested_in_vect_loop_p (loop, stmt))
2669 /* FORNOW. This restriction should be relaxed. */
2672 if (vect_print_dump_info (REPORT_DETAILS))
2673 fprintf (vect_dump, "multiple types in nested loop.");
2678 gcc_assert (ncopies >= 1);
2680 /* FORNOW: SLP not supported. */
2681 if (STMT_SLP_TYPE (stmt_info))
2684 /* 1. Is vectorizable reduction? */
2686 /* Not supportable if the reduction variable is used in the loop. */
2687 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
2690 /* Reductions that are not used even in an enclosing outer-loop,
2691 are expected to be "live" (used out of the loop). */
2692 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2693 && !STMT_VINFO_LIVE_P (stmt_info))
2696 /* Make sure it was already recognized as a reduction computation. */
2697 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2700 /* 2. Has this been recognized as a reduction pattern?
2702 Check if STMT represents a pattern that has been recognized
2703 in earlier analysis stages. For stmts that represent a pattern,
2704 the STMT_VINFO_RELATED_STMT field records the last stmt in
2705 the original sequence that constitutes the pattern. */
2707 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2710 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2711 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2712 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2713 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2716 /* 3. Check the operands of the operation. The first operands are defined
2717 inside the loop body. The last operand is the reduction variable,
2718 which is defined by the loop-header-phi. */
2720 gcc_assert (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
2722 operation = GIMPLE_STMT_OPERAND (stmt, 1);
2723 code = TREE_CODE (operation);
2724 op_type = TREE_OPERAND_LENGTH (operation);
2725 if (op_type != binary_op && op_type != ternary_op)
2727 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
2728 scalar_type = TREE_TYPE (scalar_dest);
2729 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
2730 && !SCALAR_FLOAT_TYPE_P (scalar_type))
2733 /* All uses but the last are expected to be defined in the loop.
2734 The last use is the reduction variable. */
2735 for (i = 0; i < op_type-1; i++)
2737 op = TREE_OPERAND (operation, i);
2738 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2739 gcc_assert (is_simple_use);
2740 if (dt != vect_loop_def
2741 && dt != vect_invariant_def
2742 && dt != vect_constant_def
2743 && dt != vect_induction_def)
2747 op = TREE_OPERAND (operation, i);
2748 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
2749 gcc_assert (is_simple_use);
2750 gcc_assert (dt == vect_reduction_def);
2751 gcc_assert (TREE_CODE (def_stmt) == PHI_NODE);
2753 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2755 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2757 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2760 /* 4. Supportable by target? */
2762 /* 4.1. check support for the operation in the loop */
2763 optab = optab_for_tree_code (code, vectype);
2766 if (vect_print_dump_info (REPORT_DETAILS))
2767 fprintf (vect_dump, "no optab.");
2770 vec_mode = TYPE_MODE (vectype);
2771 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2773 if (vect_print_dump_info (REPORT_DETAILS))
2774 fprintf (vect_dump, "op not supported by target.");
2775 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2776 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2777 < vect_min_worthwhile_factor (code))
2779 if (vect_print_dump_info (REPORT_DETAILS))
2780 fprintf (vect_dump, "proceeding using word mode.");
2783 /* Worthwhile without SIMD support? */
2784 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2785 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2786 < vect_min_worthwhile_factor (code))
2788 if (vect_print_dump_info (REPORT_DETAILS))
2789 fprintf (vect_dump, "not worthwhile without SIMD support.");
2793 /* 4.2. Check support for the epilog operation.
2795 If STMT represents a reduction pattern, then the type of the
2796 reduction variable may be different than the type of the rest
2797 of the arguments. For example, consider the case of accumulation
2798 of shorts into an int accumulator; The original code:
2799 S1: int_a = (int) short_a;
2800 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2803 STMT: int_acc = widen_sum <short_a, int_acc>
2806 1. The tree-code that is used to create the vector operation in the
2807 epilog code (that reduces the partial results) is not the
2808 tree-code of STMT, but is rather the tree-code of the original
2809 stmt from the pattern that STMT is replacing. I.e, in the example
2810 above we want to use 'widen_sum' in the loop, but 'plus' in the
2812 2. The type (mode) we use to check available target support
2813 for the vector operation to be created in the *epilog*, is
2814 determined by the type of the reduction variable (in the example
2815 above we'd check this: plus_optab[vect_int_mode]).
2816 However the type (mode) we use to check available target support
2817 for the vector operation to be created *inside the loop*, is
2818 determined by the type of the other arguments to STMT (in the
2819 example we'd check this: widen_sum_optab[vect_short_mode]).
2821 This is contrary to "regular" reductions, in which the types of all
2822 the arguments are the same as the type of the reduction variable.
2823 For "regular" reductions we can therefore use the same vector type
2824 (and also the same tree-code) when generating the epilog code and
2825 when generating the code inside the loop. */
2829 /* This is a reduction pattern: get the vectype from the type of the
2830 reduction variable, and get the tree-code from orig_stmt. */
2831 orig_code = TREE_CODE (GIMPLE_STMT_OPERAND (orig_stmt, 1));
2832 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
2835 if (vect_print_dump_info (REPORT_DETAILS))
2837 fprintf (vect_dump, "unsupported data-type ");
2838 print_generic_expr (vect_dump, TREE_TYPE (def), TDF_SLIM);
2843 vec_mode = TYPE_MODE (vectype);
2847 /* Regular reduction: use the same vectype and tree-code as used for
2848 the vector code inside the loop can be used for the epilog code. */
2852 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
2854 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype);
2857 if (vect_print_dump_info (REPORT_DETAILS))
2858 fprintf (vect_dump, "no optab for reduction.");
2859 epilog_reduc_code = NUM_TREE_CODES;
2861 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
2863 if (vect_print_dump_info (REPORT_DETAILS))
2864 fprintf (vect_dump, "reduc op not supported by target.");
2865 epilog_reduc_code = NUM_TREE_CODES;
2868 if (!vec_stmt) /* transformation not required. */
2870 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
2871 if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
2878 if (vect_print_dump_info (REPORT_DETAILS))
2879 fprintf (vect_dump, "transform reduction.");
2881 /* Create the destination vector */
2882 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2884 /* Create the reduction-phi that defines the reduction-operand. */
2885 new_phi = create_phi_node (vec_dest, loop->header);
2887 /* In case the vectorization factor (VF) is bigger than the number
2888 of elements that we can fit in a vectype (nunits), we have to generate
2889 more than one vector stmt - i.e - we need to "unroll" the
2890 vector stmt by a factor VF/nunits. For more details see documentation
2891 in vectorizable_operation. */
2893 prev_stmt_info = NULL;
2894 for (j = 0; j < ncopies; j++)
2899 op = TREE_OPERAND (operation, 0);
2900 loop_vec_def0 = vect_get_vec_def_for_operand (op, stmt, NULL);
2901 if (op_type == ternary_op)
2903 op = TREE_OPERAND (operation, 1);
2904 loop_vec_def1 = vect_get_vec_def_for_operand (op, stmt, NULL);
2907 /* Get the vector def for the reduction variable from the phi node */
2908 reduc_def = PHI_RESULT (new_phi);
2912 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
2913 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
2914 if (op_type == ternary_op)
2915 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
2917 /* Get the vector def for the reduction variable from the vectorized
2918 reduction operation generated in the previous iteration (j-1) */
2919 reduc_def = GIMPLE_STMT_OPERAND (new_stmt ,0);
2922 /* Arguments are ready. create the new vector stmt. */
2923 if (op_type == binary_op)
2924 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
2926 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
2928 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
2929 new_temp = make_ssa_name (vec_dest, new_stmt);
2930 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
2931 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2934 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
2936 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
2937 prev_stmt_info = vinfo_for_stmt (new_stmt);
2940 /* Finalize the reduction-phi (set it's arguments) and create the
2941 epilog reduction code. */
2942 vect_create_epilog_for_reduction (new_temp, stmt, epilog_reduc_code, new_phi);
2946 /* Checks if CALL can be vectorized in type VECTYPE. Returns
2947 a function declaration if the target has a vectorized version
2948 of the function, or NULL_TREE if the function cannot be vectorized. */
2951 vectorizable_function (tree call, tree vectype_out, tree vectype_in)
2953 tree fndecl = get_callee_fndecl (call);
2954 enum built_in_function code;
2956 /* We only handle functions that do not read or clobber memory -- i.e.
2957 const or novops ones. */
2958 if (!(call_expr_flags (call) & (ECF_CONST | ECF_NOVOPS)))
2962 || TREE_CODE (fndecl) != FUNCTION_DECL
2963 || !DECL_BUILT_IN (fndecl))
2966 code = DECL_FUNCTION_CODE (fndecl);
2967 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
2971 /* Function vectorizable_call.
2973 Check if STMT performs a function call that can be vectorized.
2974 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2975 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2976 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2979 vectorizable_call (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2985 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
2986 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
2987 tree vectype_out, vectype_in;
2990 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2991 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2992 tree fndecl, rhs, new_temp, def, def_stmt, rhs_type, lhs_type;
2993 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
2995 int ncopies, j, nargs;
2996 call_expr_arg_iterator iter;
2998 enum { NARROW, NONE, WIDEN } modifier;
3000 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3003 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3006 /* FORNOW: SLP not supported. */
3007 if (STMT_SLP_TYPE (stmt_info))
3010 /* Is STMT a vectorizable call? */
3011 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3014 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3017 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3018 if (TREE_CODE (operation) != CALL_EXPR)
3021 /* Process function arguments. */
3022 rhs_type = NULL_TREE;
3024 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3026 /* Bail out if the function has more than two arguments, we
3027 do not have interesting builtin functions to vectorize with
3028 more than two arguments. */
3032 /* We can only handle calls with arguments of the same type. */
3034 && rhs_type != TREE_TYPE (op))
3036 if (vect_print_dump_info (REPORT_DETAILS))
3037 fprintf (vect_dump, "argument types differ.");
3040 rhs_type = TREE_TYPE (op);
3042 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[nargs]))
3044 if (vect_print_dump_info (REPORT_DETAILS))
3045 fprintf (vect_dump, "use not simple.");
3052 /* No arguments is also not good. */
3056 vectype_in = get_vectype_for_scalar_type (rhs_type);
3059 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3061 lhs_type = TREE_TYPE (GIMPLE_STMT_OPERAND (stmt, 0));
3062 vectype_out = get_vectype_for_scalar_type (lhs_type);
3065 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3068 if (nunits_in == nunits_out / 2)
3070 else if (nunits_out == nunits_in)
3072 else if (nunits_out == nunits_in / 2)
3077 /* For now, we only vectorize functions if a target specific builtin
3078 is available. TODO -- in some cases, it might be profitable to
3079 insert the calls for pieces of the vector, in order to be able
3080 to vectorize other operations in the loop. */
3081 fndecl = vectorizable_function (operation, vectype_out, vectype_in);
3082 if (fndecl == NULL_TREE)
3084 if (vect_print_dump_info (REPORT_DETAILS))
3085 fprintf (vect_dump, "function is not vectorizable.");
3090 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
3092 if (modifier == NARROW)
3093 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3095 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3097 /* Sanity check: make sure that at least one copy of the vectorized stmt
3098 needs to be generated. */
3099 gcc_assert (ncopies >= 1);
3101 /* FORNOW. This restriction should be relaxed. */
3102 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3104 if (vect_print_dump_info (REPORT_DETAILS))
3105 fprintf (vect_dump, "multiple types in nested loop.");
3109 if (!vec_stmt) /* transformation not required. */
3111 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3112 if (vect_print_dump_info (REPORT_DETAILS))
3113 fprintf (vect_dump, "=== vectorizable_call ===");
3114 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3120 if (vect_print_dump_info (REPORT_DETAILS))
3121 fprintf (vect_dump, "transform operation.");
3123 /* FORNOW. This restriction should be relaxed. */
3124 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3126 if (vect_print_dump_info (REPORT_DETAILS))
3127 fprintf (vect_dump, "multiple types in nested loop.");
3132 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3133 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3135 prev_stmt_info = NULL;
3139 for (j = 0; j < ncopies; ++j)
3141 /* Build argument list for the vectorized call. */
3142 /* FIXME: Rewrite this so that it doesn't
3143 construct a temporary list. */
3146 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3150 = vect_get_vec_def_for_operand (op, stmt, NULL);
3153 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3155 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3159 vargs = nreverse (vargs);
3161 rhs = build_function_call_expr (fndecl, vargs);
3162 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3163 new_temp = make_ssa_name (vec_dest, new_stmt);
3164 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3166 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3169 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3171 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3173 prev_stmt_info = vinfo_for_stmt (new_stmt);
3179 for (j = 0; j < ncopies; ++j)
3181 /* Build argument list for the vectorized call. */
3182 /* FIXME: Rewrite this so that it doesn't
3183 construct a temporary list. */
3186 FOR_EACH_CALL_EXPR_ARG (op, iter, operation)
3191 = vect_get_vec_def_for_operand (op, stmt, NULL);
3193 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3198 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3200 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3203 vargs = tree_cons (NULL_TREE, vec_oprnd0, vargs);
3204 vargs = tree_cons (NULL_TREE, vec_oprnd1, vargs);
3208 vargs = nreverse (vargs);
3210 rhs = build_function_call_expr (fndecl, vargs);
3211 new_stmt = build_gimple_modify_stmt (vec_dest, rhs);
3212 new_temp = make_ssa_name (vec_dest, new_stmt);
3213 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3215 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3218 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3220 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3222 prev_stmt_info = vinfo_for_stmt (new_stmt);
3225 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3230 /* No current target implements this case. */
3234 /* The call in STMT might prevent it from being removed in dce.
3235 We however cannot remove it here, due to the way the ssa name
3236 it defines is mapped to the new definition. So just replace
3237 rhs of the statement with something harmless. */
3238 type = TREE_TYPE (scalar_dest);
3239 GIMPLE_STMT_OPERAND (stmt, 1) = fold_convert (type, integer_zero_node);
3246 /* Function vect_gen_widened_results_half
3248 Create a vector stmt whose code, type, number of arguments, and result
3249 variable are CODE, VECTYPE, OP_TYPE, and VEC_DEST, and its arguments are
3250 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3251 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3252 needs to be created (DECL is a function-decl of a target-builtin).
3253 STMT is the original scalar stmt that we are vectorizing. */
3256 vect_gen_widened_results_half (enum tree_code code, tree vectype, tree decl,
3257 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3258 tree vec_dest, block_stmt_iterator *bsi,
3267 /* Generate half of the widened result: */
3268 if (code == CALL_EXPR)
3270 /* Target specific support */
3271 if (op_type == binary_op)
3272 expr = build_call_expr (decl, 2, vec_oprnd0, vec_oprnd1);
3274 expr = build_call_expr (decl, 1, vec_oprnd0);
3278 /* Generic support */
3279 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3280 if (op_type == binary_op)
3281 expr = build2 (code, vectype, vec_oprnd0, vec_oprnd1);
3283 expr = build1 (code, vectype, vec_oprnd0);
3285 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3286 new_temp = make_ssa_name (vec_dest, new_stmt);
3287 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3288 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3290 if (code == CALL_EXPR)
3292 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3294 if (TREE_CODE (sym) == SSA_NAME)
3295 sym = SSA_NAME_VAR (sym);
3296 mark_sym_for_renaming (sym);
3304 /* Check if STMT performs a conversion operation, that can be vectorized.
3305 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3306 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3307 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3310 vectorizable_conversion (tree stmt, block_stmt_iterator *bsi,
3311 tree *vec_stmt, slp_tree slp_node)
3317 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3318 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3319 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3320 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3321 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3322 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3325 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3326 tree new_stmt = NULL_TREE;
3327 stmt_vec_info prev_stmt_info;
3330 tree vectype_out, vectype_in;
3333 tree rhs_type, lhs_type;
3335 enum { NARROW, NONE, WIDEN } modifier;
3337 VEC(tree,heap) *vec_oprnds0 = NULL;
3340 /* Is STMT a vectorizable conversion? */
3342 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3345 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3348 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3351 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3354 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3355 code = TREE_CODE (operation);
3356 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3359 /* Check types of lhs and rhs. */
3360 op0 = TREE_OPERAND (operation, 0);
3361 rhs_type = TREE_TYPE (op0);
3362 vectype_in = get_vectype_for_scalar_type (rhs_type);
3365 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3367 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3368 lhs_type = TREE_TYPE (scalar_dest);
3369 vectype_out = get_vectype_for_scalar_type (lhs_type);
3372 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3375 if (nunits_in == nunits_out / 2)
3377 else if (nunits_out == nunits_in)
3379 else if (nunits_out == nunits_in / 2)
3384 if (modifier == NONE)
3385 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3387 /* Bail out if the types are both integral or non-integral. */
3388 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3389 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3392 if (modifier == NARROW)
3393 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3395 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3397 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3398 this, so we can safely override NCOPIES with 1 here. */
3402 /* Sanity check: make sure that at least one copy of the vectorized stmt
3403 needs to be generated. */
3404 gcc_assert (ncopies >= 1);
3406 /* FORNOW. This restriction should be relaxed. */
3407 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3409 if (vect_print_dump_info (REPORT_DETAILS))
3410 fprintf (vect_dump, "multiple types in nested loop.");
3414 /* Check the operands of the operation. */
3415 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3417 if (vect_print_dump_info (REPORT_DETAILS))
3418 fprintf (vect_dump, "use not simple.");
3422 /* Supportable by target? */
3423 if ((modifier == NONE
3424 && !targetm.vectorize.builtin_conversion (code, vectype_in))
3425 || (modifier == WIDEN
3426 && !supportable_widening_operation (code, stmt, vectype_in,
3429 || (modifier == NARROW
3430 && !supportable_narrowing_operation (code, stmt, vectype_in,
3433 if (vect_print_dump_info (REPORT_DETAILS))
3434 fprintf (vect_dump, "op not supported by target.");
3438 if (modifier != NONE)
3440 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3441 /* FORNOW: SLP not supported. */
3442 if (STMT_SLP_TYPE (stmt_info))
3446 if (!vec_stmt) /* transformation not required. */
3448 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3453 if (vect_print_dump_info (REPORT_DETAILS))
3454 fprintf (vect_dump, "transform conversion.");
3457 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3459 if (modifier == NONE && !slp_node)
3460 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3462 prev_stmt_info = NULL;
3466 for (j = 0; j < ncopies; j++)
3472 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
3474 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3477 targetm.vectorize.builtin_conversion (code, vectype_in);
3478 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3480 new_stmt = build_call_expr (builtin_decl, 1, vop0);
3482 /* Arguments are ready. create the new vector stmt. */
3483 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
3484 new_temp = make_ssa_name (vec_dest, new_stmt);
3485 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3486 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3487 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3488 SSA_OP_ALL_VIRTUALS)
3490 if (TREE_CODE (sym) == SSA_NAME)
3491 sym = SSA_NAME_VAR (sym);
3492 mark_sym_for_renaming (sym);
3495 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3499 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3501 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3502 prev_stmt_info = vinfo_for_stmt (new_stmt);
3507 /* In case the vectorization factor (VF) is bigger than the number
3508 of elements that we can fit in a vectype (nunits), we have to
3509 generate more than one vector stmt - i.e - we need to "unroll"
3510 the vector stmt by a factor VF/nunits. */
3511 for (j = 0; j < ncopies; j++)
3514 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3516 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3518 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3520 /* Generate first half of the widened result: */
3522 = vect_gen_widened_results_half (code1, vectype_out, decl1,
3523 vec_oprnd0, vec_oprnd1,
3524 unary_op, vec_dest, bsi, stmt);
3526 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3528 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3529 prev_stmt_info = vinfo_for_stmt (new_stmt);
3531 /* Generate second half of the widened result: */
3533 = vect_gen_widened_results_half (code2, vectype_out, decl2,
3534 vec_oprnd0, vec_oprnd1,
3535 unary_op, vec_dest, bsi, stmt);
3536 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3537 prev_stmt_info = vinfo_for_stmt (new_stmt);
3542 /* In case the vectorization factor (VF) is bigger than the number
3543 of elements that we can fit in a vectype (nunits), we have to
3544 generate more than one vector stmt - i.e - we need to "unroll"
3545 the vector stmt by a factor VF/nunits. */
3546 for (j = 0; j < ncopies; j++)
3551 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3552 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3556 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3557 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3560 /* Arguments are ready. Create the new vector stmt. */
3561 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3562 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
3563 new_temp = make_ssa_name (vec_dest, new_stmt);
3564 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
3565 vect_finish_stmt_generation (stmt, new_stmt, bsi);
3568 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3570 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3572 prev_stmt_info = vinfo_for_stmt (new_stmt);
3575 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3582 /* Function vectorizable_assignment.
3584 Check if STMT performs an assignment (copy) that can be vectorized.
3585 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3586 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3587 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3590 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3596 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3597 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3598 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3601 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3602 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3603 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3605 VEC(tree,heap) *vec_oprnds = NULL;
3608 gcc_assert (ncopies >= 1);
3610 return false; /* FORNOW */
3612 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3615 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3618 /* Is vectorizable assignment? */
3619 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3622 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3623 if (TREE_CODE (scalar_dest) != SSA_NAME)
3626 op = GIMPLE_STMT_OPERAND (stmt, 1);
3627 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
3629 if (vect_print_dump_info (REPORT_DETAILS))
3630 fprintf (vect_dump, "use not simple.");
3634 if (!vec_stmt) /* transformation not required. */
3636 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3637 if (vect_print_dump_info (REPORT_DETAILS))
3638 fprintf (vect_dump, "=== vectorizable_assignment ===");
3639 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3644 if (vect_print_dump_info (REPORT_DETAILS))
3645 fprintf (vect_dump, "transform assignment.");
3648 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3651 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3653 /* Arguments are ready. create the new vector stmt. */
3654 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3656 *vec_stmt = build_gimple_modify_stmt (vec_dest, vop);
3657 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3658 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
3659 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
3660 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3663 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3666 VEC_free (tree, heap, vec_oprnds);
3671 /* Function vect_min_worthwhile_factor.
3673 For a loop where we could vectorize the operation indicated by CODE,
3674 return the minimum vectorization factor that makes it worthwhile
3675 to use generic vectors. */
3677 vect_min_worthwhile_factor (enum tree_code code)
3698 /* Function vectorizable_induction
3700 Check if PHI performs an induction computation that can be vectorized.
3701 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3702 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3703 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3706 vectorizable_induction (tree phi, block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
3709 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3710 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3711 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3712 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3713 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3716 gcc_assert (ncopies >= 1);
3718 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3721 /* FORNOW: SLP not supported. */
3722 if (STMT_SLP_TYPE (stmt_info))
3725 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3727 if (TREE_CODE (phi) != PHI_NODE)
3730 if (!vec_stmt) /* transformation not required. */
3732 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3733 if (vect_print_dump_info (REPORT_DETAILS))
3734 fprintf (vect_dump, "=== vectorizable_induction ===");
3735 vect_model_induction_cost (stmt_info, ncopies);
3741 if (vect_print_dump_info (REPORT_DETAILS))
3742 fprintf (vect_dump, "transform induction phi.");
3744 vec_def = get_initial_def_for_induction (phi);
3745 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3750 /* Function vectorizable_operation.
3752 Check if STMT performs a binary or unary operation that can be vectorized.
3753 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3754 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3755 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3758 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
3764 tree op0, op1 = NULL;
3765 tree vec_oprnd1 = NULL_TREE;
3766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3767 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3768 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3769 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3770 enum tree_code code;
3771 enum machine_mode vec_mode;
3776 enum machine_mode optab_op2_mode;
3778 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3779 tree new_stmt = NULL_TREE;
3780 stmt_vec_info prev_stmt_info;
3781 int nunits_in = TYPE_VECTOR_SUBPARTS (vectype);
3784 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3786 VEC(tree,heap) *vec_oprnds0 = NULL, *vec_oprnds1 = NULL;
3789 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3790 this, so we can safely override NCOPIES with 1 here. */
3793 gcc_assert (ncopies >= 1);
3794 /* FORNOW. This restriction should be relaxed. */
3795 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
3797 if (vect_print_dump_info (REPORT_DETAILS))
3798 fprintf (vect_dump, "multiple types in nested loop.");
3802 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3805 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3808 /* Is STMT a vectorizable binary/unary operation? */
3809 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
3812 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
3815 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
3816 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
3819 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3820 if (nunits_out != nunits_in)
3823 operation = GIMPLE_STMT_OPERAND (stmt, 1);
3824 code = TREE_CODE (operation);
3826 /* For pointer addition, we should use the normal plus for
3827 the vector addition. */
3828 if (code == POINTER_PLUS_EXPR)
3831 optab = optab_for_tree_code (code, vectype);
3833 /* Support only unary or binary operations. */
3834 op_type = TREE_OPERAND_LENGTH (operation);
3835 if (op_type != unary_op && op_type != binary_op)
3837 if (vect_print_dump_info (REPORT_DETAILS))
3838 fprintf (vect_dump, "num. args = %d (not unary/binary op).", op_type);
3842 op0 = TREE_OPERAND (operation, 0);
3843 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3845 if (vect_print_dump_info (REPORT_DETAILS))
3846 fprintf (vect_dump, "use not simple.");
3850 if (op_type == binary_op)
3852 op1 = TREE_OPERAND (operation, 1);
3853 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
3855 if (vect_print_dump_info (REPORT_DETAILS))
3856 fprintf (vect_dump, "use not simple.");
3861 /* Supportable by target? */
3864 if (vect_print_dump_info (REPORT_DETAILS))
3865 fprintf (vect_dump, "no optab.");
3868 vec_mode = TYPE_MODE (vectype);
3869 icode = (int) optab_handler (optab, vec_mode)->insn_code;
3870 if (icode == CODE_FOR_nothing)
3872 if (vect_print_dump_info (REPORT_DETAILS))
3873 fprintf (vect_dump, "op not supported by target.");
3874 /* Check only during analysis. */
3875 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
3876 || (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3877 < vect_min_worthwhile_factor (code)
3880 if (vect_print_dump_info (REPORT_DETAILS))
3881 fprintf (vect_dump, "proceeding using word mode.");
3884 /* Worthwhile without SIMD support? Check only during analysis. */
3885 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
3886 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
3887 < vect_min_worthwhile_factor (code)
3890 if (vect_print_dump_info (REPORT_DETAILS))
3891 fprintf (vect_dump, "not worthwhile without SIMD support.");
3895 if (code == LSHIFT_EXPR || code == RSHIFT_EXPR)
3897 /* FORNOW: not yet supported. */
3898 if (!VECTOR_MODE_P (vec_mode))
3901 /* Invariant argument is needed for a vector shift
3902 by a scalar shift operand. */
3903 optab_op2_mode = insn_data[icode].operand[2].mode;
3904 if (! (VECTOR_MODE_P (optab_op2_mode)
3905 || dt[1] == vect_constant_def
3906 || dt[1] == vect_invariant_def))
3908 if (vect_print_dump_info (REPORT_DETAILS))
3909 fprintf (vect_dump, "operand mode requires invariant argument.");
3914 if (!vec_stmt) /* transformation not required. */
3916 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
3917 if (vect_print_dump_info (REPORT_DETAILS))
3918 fprintf (vect_dump, "=== vectorizable_operation ===");
3919 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3925 if (vect_print_dump_info (REPORT_DETAILS))
3926 fprintf (vect_dump, "transform binary/unary operation.");
3929 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3932 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3933 if (op_type == binary_op)
3934 vec_oprnds1 = VEC_alloc (tree, heap, 1);
3936 /* In case the vectorization factor (VF) is bigger than the number
3937 of elements that we can fit in a vectype (nunits), we have to generate
3938 more than one vector stmt - i.e - we need to "unroll" the
3939 vector stmt by a factor VF/nunits. In doing so, we record a pointer
3940 from one copy of the vector stmt to the next, in the field
3941 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
3942 stages to find the correct vector defs to be used when vectorizing
3943 stmts that use the defs of the current stmt. The example below illustrates
3944 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
3945 4 vectorized stmts):
3947 before vectorization:
3948 RELATED_STMT VEC_STMT
3952 step 1: vectorize stmt S1 (done in vectorizable_load. See more details
3954 RELATED_STMT VEC_STMT
3955 VS1_0: vx0 = memref0 VS1_1 -
3956 VS1_1: vx1 = memref1 VS1_2 -
3957 VS1_2: vx2 = memref2 VS1_3 -
3958 VS1_3: vx3 = memref3 - -
3959 S1: x = load - VS1_0
3962 step2: vectorize stmt S2 (done here):
3963 To vectorize stmt S2 we first need to find the relevant vector
3964 def for the first operand 'x'. This is, as usual, obtained from
3965 the vector stmt recorded in the STMT_VINFO_VEC_STMT of the stmt
3966 that defines 'x' (S1). This way we find the stmt VS1_0, and the
3967 relevant vector def 'vx0'. Having found 'vx0' we can generate
3968 the vector stmt VS2_0, and as usual, record it in the
3969 STMT_VINFO_VEC_STMT of stmt S2.
3970 When creating the second copy (VS2_1), we obtain the relevant vector
3971 def from the vector stmt recorded in the STMT_VINFO_RELATED_STMT of
3972 stmt VS1_0. This way we find the stmt VS1_1 and the relevant
3973 vector def 'vx1'. Using 'vx1' we create stmt VS2_1 and record a
3974 pointer to it in the STMT_VINFO_RELATED_STMT of the vector stmt VS2_0.
3975 Similarly when creating stmts VS2_2 and VS2_3. This is the resulting
3976 chain of stmts and pointers:
3977 RELATED_STMT VEC_STMT
3978 VS1_0: vx0 = memref0 VS1_1 -
3979 VS1_1: vx1 = memref1 VS1_2 -
3980 VS1_2: vx2 = memref2 VS1_3 -
3981 VS1_3: vx3 = memref3 - -
3982 S1: x = load - VS1_0
3983 VS2_0: vz0 = vx0 + v1 VS2_1 -
3984 VS2_1: vz1 = vx1 + v1 VS2_2 -
3985 VS2_2: vz2 = vx2 + v1 VS2_3 -
3986 VS2_3: vz3 = vx3 + v1 - -
3987 S2: z = x + 1 - VS2_0 */
3989 prev_stmt_info = NULL;
3990 for (j = 0; j < ncopies; j++)
3995 if (op_type == binary_op
3996 && (code == LSHIFT_EXPR || code == RSHIFT_EXPR))
3998 /* Vector shl and shr insn patterns can be defined with scalar
3999 operand 2 (shift operand). In this case, use constant or loop
4000 invariant op1 directly, without extending it to vector mode
4002 optab_op2_mode = insn_data[icode].operand[2].mode;
4003 if (!VECTOR_MODE_P (optab_op2_mode))
4005 if (vect_print_dump_info (REPORT_DETAILS))
4006 fprintf (vect_dump, "operand 1 using scalar mode.");
4008 VEC_quick_push (tree, vec_oprnds1, vec_oprnd1);
4012 /* vec_oprnd is available if operand 1 should be of a scalar-type
4013 (a special case for certain kind of vector shifts); otherwise,
4014 operand 1 should be of a vector type (the usual case). */
4015 if (op_type == binary_op && !vec_oprnd1)
4016 vect_get_vec_defs (op0, op1, stmt, &vec_oprnds0, &vec_oprnds1,
4019 vect_get_vec_defs (op0, NULL_TREE, stmt, &vec_oprnds0, NULL,
4023 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, &vec_oprnds1);
4025 /* Arguments are ready. Create the new vector stmt. */
4026 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
4028 if (op_type == binary_op)
4030 vop1 = VEC_index (tree, vec_oprnds1, i);
4031 new_stmt = build_gimple_modify_stmt (vec_dest,
4032 build2 (code, vectype, vop0, vop1));
4035 new_stmt = build_gimple_modify_stmt (vec_dest,
4036 build1 (code, vectype, vop0));
4038 new_temp = make_ssa_name (vec_dest, new_stmt);
4039 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4040 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4042 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
4046 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4048 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4049 prev_stmt_info = vinfo_for_stmt (new_stmt);
4052 VEC_free (tree, heap, vec_oprnds0);
4054 VEC_free (tree, heap, vec_oprnds1);
4060 /* Function vectorizable_type_demotion
4062 Check if STMT performs a binary or unary operation that involves
4063 type demotion, and if it can be vectorized.
4064 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4065 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4066 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4069 vectorizable_type_demotion (tree stmt, block_stmt_iterator *bsi,
4076 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4077 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4078 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4079 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4080 enum tree_code code, code1 = ERROR_MARK;
4083 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4085 stmt_vec_info prev_stmt_info;
4094 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4097 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4100 /* Is STMT a vectorizable type-demotion operation? */
4101 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4104 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4107 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4108 code = TREE_CODE (operation);
4109 if (code != NOP_EXPR && code != CONVERT_EXPR)
4112 op0 = TREE_OPERAND (operation, 0);
4113 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4116 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4118 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4119 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4122 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4123 if (nunits_in != nunits_out / 2) /* FORNOW */
4126 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
4127 gcc_assert (ncopies >= 1);
4128 /* FORNOW. This restriction should be relaxed. */
4129 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4131 if (vect_print_dump_info (REPORT_DETAILS))
4132 fprintf (vect_dump, "multiple types in nested loop.");
4136 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4137 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4138 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4139 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4140 && (code == NOP_EXPR || code == CONVERT_EXPR))))
4143 /* Check the operands of the operation. */
4144 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4146 if (vect_print_dump_info (REPORT_DETAILS))
4147 fprintf (vect_dump, "use not simple.");
4151 /* Supportable by target? */
4152 if (!supportable_narrowing_operation (code, stmt, vectype_in, &code1))
4155 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4157 if (!vec_stmt) /* transformation not required. */
4159 STMT_VINFO_TYPE (stmt_info) = type_demotion_vec_info_type;
4160 if (vect_print_dump_info (REPORT_DETAILS))
4161 fprintf (vect_dump, "=== vectorizable_demotion ===");
4162 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
4167 if (vect_print_dump_info (REPORT_DETAILS))
4168 fprintf (vect_dump, "transform type demotion operation. ncopies = %d.",
4172 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4174 /* In case the vectorization factor (VF) is bigger than the number
4175 of elements that we can fit in a vectype (nunits), we have to generate
4176 more than one vector stmt - i.e - we need to "unroll" the
4177 vector stmt by a factor VF/nunits. */
4178 prev_stmt_info = NULL;
4179 for (j = 0; j < ncopies; j++)
4184 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4185 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4189 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
4190 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4193 /* Arguments are ready. Create the new vector stmt. */
4194 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
4195 new_stmt = build_gimple_modify_stmt (vec_dest, expr);
4196 new_temp = make_ssa_name (vec_dest, new_stmt);
4197 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
4198 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4201 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4203 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4205 prev_stmt_info = vinfo_for_stmt (new_stmt);
4208 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4213 /* Function vectorizable_type_promotion
4215 Check if STMT performs a binary or unary operation that involves
4216 type promotion, and if it can be vectorized.
4217 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4218 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4219 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4222 vectorizable_type_promotion (tree stmt, block_stmt_iterator *bsi,
4228 tree op0, op1 = NULL;
4229 tree vec_oprnd0=NULL, vec_oprnd1=NULL;
4230 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4231 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4232 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4233 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
4234 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
4237 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
4239 stmt_vec_info prev_stmt_info;
4247 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4250 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4253 /* Is STMT a vectorizable type-promotion operation? */
4254 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4257 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
4260 operation = GIMPLE_STMT_OPERAND (stmt, 1);
4261 code = TREE_CODE (operation);
4262 if (code != NOP_EXPR && code != CONVERT_EXPR
4263 && code != WIDEN_MULT_EXPR)
4266 op0 = TREE_OPERAND (operation, 0);
4267 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op0));
4270 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
4272 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4273 vectype_out = get_vectype_for_scalar_type (TREE_TYPE (scalar_dest));
4276 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
4277 if (nunits_out != nunits_in / 2) /* FORNOW */
4280 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
4281 gcc_assert (ncopies >= 1);
4282 /* FORNOW. This restriction should be relaxed. */
4283 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4285 if (vect_print_dump_info (REPORT_DETAILS))
4286 fprintf (vect_dump, "multiple types in nested loop.");
4290 if (! ((INTEGRAL_TYPE_P (TREE_TYPE (scalar_dest))
4291 && INTEGRAL_TYPE_P (TREE_TYPE (op0)))
4292 || (SCALAR_FLOAT_TYPE_P (TREE_TYPE (scalar_dest))
4293 && SCALAR_FLOAT_TYPE_P (TREE_TYPE (op0))
4294 && (code == CONVERT_EXPR || code == NOP_EXPR))))
4297 /* Check the operands of the operation. */
4298 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
4300 if (vect_print_dump_info (REPORT_DETAILS))
4301 fprintf (vect_dump, "use not simple.");
4305 op_type = TREE_CODE_LENGTH (code);
4306 if (op_type == binary_op)
4308 op1 = TREE_OPERAND (operation, 1);
4309 if (!vect_is_simple_use (op1, loop_vinfo, &def_stmt, &def, &dt[1]))
4311 if (vect_print_dump_info (REPORT_DETAILS))
4312 fprintf (vect_dump, "use not simple.");
4317 /* Supportable by target? */
4318 if (!supportable_widening_operation (code, stmt, vectype_in,
4319 &decl1, &decl2, &code1, &code2))
4322 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
4324 if (!vec_stmt) /* transformation not required. */
4326 STMT_VINFO_TYPE (stmt_info) = type_promotion_vec_info_type;
4327 if (vect_print_dump_info (REPORT_DETAILS))
4328 fprintf (vect_dump, "=== vectorizable_promotion ===");
4329 vect_model_simple_cost (stmt_info, 2*ncopies, dt, NULL);
4335 if (vect_print_dump_info (REPORT_DETAILS))
4336 fprintf (vect_dump, "transform type promotion operation. ncopies = %d.",
4340 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
4342 /* In case the vectorization factor (VF) is bigger than the number
4343 of elements that we can fit in a vectype (nunits), we have to generate
4344 more than one vector stmt - i.e - we need to "unroll" the
4345 vector stmt by a factor VF/nunits. */
4347 prev_stmt_info = NULL;
4348 for (j = 0; j < ncopies; j++)
4353 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
4354 if (op_type == binary_op)
4355 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt, NULL);
4359 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
4360 if (op_type == binary_op)
4361 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd1);
4364 /* Arguments are ready. Create the new vector stmt. We are creating
4365 two vector defs because the widened result does not fit in one vector.
4366 The vectorized stmt can be expressed as a call to a taregt builtin,
4367 or a using a tree-code. */
4368 /* Generate first half of the widened result: */
4369 new_stmt = vect_gen_widened_results_half (code1, vectype_out, decl1,
4370 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4372 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
4374 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4375 prev_stmt_info = vinfo_for_stmt (new_stmt);
4377 /* Generate second half of the widened result: */
4378 new_stmt = vect_gen_widened_results_half (code2, vectype_out, decl2,
4379 vec_oprnd0, vec_oprnd1, op_type, vec_dest, bsi, stmt);
4380 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4381 prev_stmt_info = vinfo_for_stmt (new_stmt);
4385 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4390 /* Function vect_strided_store_supported.
4392 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
4393 and FALSE otherwise. */
4396 vect_strided_store_supported (tree vectype)
4398 optab interleave_high_optab, interleave_low_optab;
4401 mode = (int) TYPE_MODE (vectype);
4403 /* Check that the operation is supported. */
4404 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
4406 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
4408 if (!interleave_high_optab || !interleave_low_optab)
4410 if (vect_print_dump_info (REPORT_DETAILS))
4411 fprintf (vect_dump, "no optab for interleave.");
4415 if (optab_handler (interleave_high_optab, mode)->insn_code
4417 || optab_handler (interleave_low_optab, mode)->insn_code
4418 == CODE_FOR_nothing)
4420 if (vect_print_dump_info (REPORT_DETAILS))
4421 fprintf (vect_dump, "interleave op not supported by target.");
4429 /* Function vect_permute_store_chain.
4431 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4432 a power of 2, generate interleave_high/low stmts to reorder the data
4433 correctly for the stores. Return the final references for stores in
4436 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4437 The input is 4 vectors each containing 8 elements. We assign a number to each
4438 element, the input sequence is:
4440 1st vec: 0 1 2 3 4 5 6 7
4441 2nd vec: 8 9 10 11 12 13 14 15
4442 3rd vec: 16 17 18 19 20 21 22 23
4443 4th vec: 24 25 26 27 28 29 30 31
4445 The output sequence should be:
4447 1st vec: 0 8 16 24 1 9 17 25
4448 2nd vec: 2 10 18 26 3 11 19 27
4449 3rd vec: 4 12 20 28 5 13 21 30
4450 4th vec: 6 14 22 30 7 15 23 31
4452 i.e., we interleave the contents of the four vectors in their order.
4454 We use interleave_high/low instructions to create such output. The input of
4455 each interleave_high/low operation is two vectors:
4458 the even elements of the result vector are obtained left-to-right from the
4459 high/low elements of the first vector. The odd elements of the result are
4460 obtained left-to-right from the high/low elements of the second vector.
4461 The output of interleave_high will be: 0 4 1 5
4462 and of interleave_low: 2 6 3 7
4465 The permutation is done in log LENGTH stages. In each stage interleave_high
4466 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4467 where the first argument is taken from the first half of DR_CHAIN and the
4468 second argument from it's second half.
4471 I1: interleave_high (1st vec, 3rd vec)
4472 I2: interleave_low (1st vec, 3rd vec)
4473 I3: interleave_high (2nd vec, 4th vec)
4474 I4: interleave_low (2nd vec, 4th vec)
4476 The output for the first stage is:
4478 I1: 0 16 1 17 2 18 3 19
4479 I2: 4 20 5 21 6 22 7 23
4480 I3: 8 24 9 25 10 26 11 27
4481 I4: 12 28 13 29 14 30 15 31
4483 The output of the second stage, i.e. the final result is:
4485 I1: 0 8 16 24 1 9 17 25
4486 I2: 2 10 18 26 3 11 19 27
4487 I3: 4 12 20 28 5 13 21 30
4488 I4: 6 14 22 30 7 15 23 31. */
4491 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4492 unsigned int length,
4494 block_stmt_iterator *bsi,
4495 VEC(tree,heap) **result_chain)
4497 tree perm_dest, perm_stmt, vect1, vect2, high, low;
4498 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
4499 tree scalar_dest, tmp;
4502 VEC(tree,heap) *first, *second;
4504 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4505 first = VEC_alloc (tree, heap, length/2);
4506 second = VEC_alloc (tree, heap, length/2);
4508 /* Check that the operation is supported. */
4509 if (!vect_strided_store_supported (vectype))
4512 *result_chain = VEC_copy (tree, heap, dr_chain);
4514 for (i = 0; i < exact_log2 (length); i++)
4516 for (j = 0; j < length/2; j++)
4518 vect1 = VEC_index (tree, dr_chain, j);
4519 vect2 = VEC_index (tree, dr_chain, j+length/2);
4521 /* Create interleaving stmt:
4522 in the case of big endian:
4523 high = interleave_high (vect1, vect2)
4524 and in the case of little endian:
4525 high = interleave_low (vect1, vect2). */
4526 perm_dest = create_tmp_var (vectype, "vect_inter_high");
4527 DECL_GIMPLE_REG_P (perm_dest) = 1;
4528 add_referenced_var (perm_dest);
4529 if (BYTES_BIG_ENDIAN)
4530 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4532 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4533 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4534 high = make_ssa_name (perm_dest, perm_stmt);
4535 GIMPLE_STMT_OPERAND (perm_stmt, 0) = high;
4536 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4537 VEC_replace (tree, *result_chain, 2*j, high);
4539 /* Create interleaving stmt:
4540 in the case of big endian:
4541 low = interleave_low (vect1, vect2)
4542 and in the case of little endian:
4543 low = interleave_high (vect1, vect2). */
4544 perm_dest = create_tmp_var (vectype, "vect_inter_low");
4545 DECL_GIMPLE_REG_P (perm_dest) = 1;
4546 add_referenced_var (perm_dest);
4547 if (BYTES_BIG_ENDIAN)
4548 tmp = build2 (VEC_INTERLEAVE_LOW_EXPR, vectype, vect1, vect2);
4550 tmp = build2 (VEC_INTERLEAVE_HIGH_EXPR, vectype, vect1, vect2);
4551 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
4552 low = make_ssa_name (perm_dest, perm_stmt);
4553 GIMPLE_STMT_OPERAND (perm_stmt, 0) = low;
4554 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
4555 VEC_replace (tree, *result_chain, 2*j+1, low);
4557 dr_chain = VEC_copy (tree, heap, *result_chain);
4563 /* Function vectorizable_store.
4565 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
4567 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
4568 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
4569 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
4572 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
4578 tree vec_oprnd = NULL_TREE;
4579 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4580 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr = NULL;
4581 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4582 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4583 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4584 enum machine_mode vec_mode;
4586 enum dr_alignment_support alignment_support_scheme;
4588 enum vect_def_type dt;
4589 stmt_vec_info prev_stmt_info = NULL;
4590 tree dataref_ptr = NULL_TREE;
4591 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
4592 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
4594 tree next_stmt, first_stmt = NULL_TREE;
4595 bool strided_store = false;
4596 unsigned int group_size, i;
4597 VEC(tree,heap) *dr_chain = NULL, *oprnds = NULL, *result_chain = NULL;
4599 VEC(tree,heap) *vec_oprnds = NULL;
4600 bool slp = (slp_node != NULL);
4601 stmt_vec_info first_stmt_vinfo;
4602 unsigned int vec_num;
4604 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
4605 this, so we can safely override NCOPIES with 1 here. */
4609 gcc_assert (ncopies >= 1);
4611 /* FORNOW. This restriction should be relaxed. */
4612 if (nested_in_vect_loop_p (loop, stmt) && ncopies > 1)
4614 if (vect_print_dump_info (REPORT_DETAILS))
4615 fprintf (vect_dump, "multiple types in nested loop.");
4619 if (!STMT_VINFO_RELEVANT_P (stmt_info))
4622 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
4625 /* Is vectorizable store? */
4627 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
4630 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4631 if (TREE_CODE (scalar_dest) != ARRAY_REF
4632 && TREE_CODE (scalar_dest) != INDIRECT_REF
4633 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
4636 op = GIMPLE_STMT_OPERAND (stmt, 1);
4637 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4639 if (vect_print_dump_info (REPORT_DETAILS))
4640 fprintf (vect_dump, "use not simple.");
4644 vec_mode = TYPE_MODE (vectype);
4645 /* FORNOW. In some cases can vectorize even if data-type not supported
4646 (e.g. - array initialization with 0). */
4647 if (optab_handler (mov_optab, (int)vec_mode)->insn_code == CODE_FOR_nothing)
4650 if (!STMT_VINFO_DATA_REF (stmt_info))
4653 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
4655 strided_store = true;
4656 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
4657 if (!vect_strided_store_supported (vectype)
4658 && !PURE_SLP_STMT (stmt_info) && !slp)
4661 if (first_stmt == stmt)
4663 /* STMT is the leader of the group. Check the operands of all the
4664 stmts of the group. */
4665 next_stmt = DR_GROUP_NEXT_DR (stmt_info);
4668 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4669 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
4671 if (vect_print_dump_info (REPORT_DETAILS))
4672 fprintf (vect_dump, "use not simple.");
4675 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4680 if (!vec_stmt) /* transformation not required. */
4682 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
4683 if (!PURE_SLP_STMT (stmt_info))
4684 vect_model_store_cost (stmt_info, ncopies, dt, NULL);
4692 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
4693 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
4695 DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))++;
4698 gcc_assert (!nested_in_vect_loop_p (loop, stmt));
4700 /* We vectorize all the stmts of the interleaving group when we
4701 reach the last stmt in the group. */
4702 if (DR_GROUP_STORE_COUNT (vinfo_for_stmt (first_stmt))
4703 < DR_GROUP_SIZE (vinfo_for_stmt (first_stmt))
4706 *vec_stmt = NULL_TREE;
4711 strided_store = false;
4713 /* VEC_NUM is the number of vect stmts to be created for this group. */
4714 if (slp && SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node) < group_size)
4715 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4717 vec_num = group_size;
4723 group_size = vec_num = 1;
4724 first_stmt_vinfo = stmt_info;
4727 if (vect_print_dump_info (REPORT_DETAILS))
4728 fprintf (vect_dump, "transform store. ncopies = %d",ncopies);
4730 dr_chain = VEC_alloc (tree, heap, group_size);
4731 oprnds = VEC_alloc (tree, heap, group_size);
4733 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
4734 gcc_assert (alignment_support_scheme);
4735 gcc_assert (alignment_support_scheme == dr_aligned); /* FORNOW */
4737 /* In case the vectorization factor (VF) is bigger than the number
4738 of elements that we can fit in a vectype (nunits), we have to generate
4739 more than one vector stmt - i.e - we need to "unroll" the
4740 vector stmt by a factor VF/nunits. For more details see documentation in
4741 vect_get_vec_def_for_copy_stmt. */
4743 /* In case of interleaving (non-unit strided access):
4750 We create vectorized stores starting from base address (the access of the
4751 first stmt in the chain (S2 in the above example), when the last store stmt
4752 of the chain (S4) is reached:
4755 VS2: &base + vec_size*1 = vx0
4756 VS3: &base + vec_size*2 = vx1
4757 VS4: &base + vec_size*3 = vx3
4759 Then permutation statements are generated:
4761 VS5: vx5 = VEC_INTERLEAVE_HIGH_EXPR < vx0, vx3 >
4762 VS6: vx6 = VEC_INTERLEAVE_LOW_EXPR < vx0, vx3 >
4765 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
4766 (the order of the data-refs in the output of vect_permute_store_chain
4767 corresponds to the order of scalar stmts in the interleaving chain - see
4768 the documentation of vect_permute_store_chain()).
4770 In case of both multiple types and interleaving, above vector stores and
4771 permutation stmts are created for every copy. The result vector stmts are
4772 put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
4773 STMT_VINFO_RELATED_STMT for the next copies.
4776 prev_stmt_info = NULL;
4777 for (j = 0; j < ncopies; j++)
4786 /* Get vectorized arguments for SLP_NODE. */
4787 vect_get_slp_defs (slp_node, &vec_oprnds, NULL);
4789 vec_oprnd = VEC_index (tree, vec_oprnds, 0);
4793 /* For interleaved stores we collect vectorized defs for all the
4794 stores in the group in DR_CHAIN and OPRNDS. DR_CHAIN is then
4795 used as an input to vect_permute_store_chain(), and OPRNDS as
4796 an input to vect_get_vec_def_for_stmt_copy() for the next copy.
4798 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4799 OPRNDS are of size 1. */
4800 next_stmt = first_stmt;
4801 for (i = 0; i < group_size; i++)
4803 /* Since gaps are not supported for interleaved stores,
4804 GROUP_SIZE is the exact number of stmts in the chain.
4805 Therefore, NEXT_STMT can't be NULL_TREE. In case that
4806 there is no interleaving, GROUP_SIZE is 1, and only one
4807 iteration of the loop will be executed. */
4808 gcc_assert (next_stmt);
4809 op = GIMPLE_STMT_OPERAND (next_stmt, 1);
4811 vec_oprnd = vect_get_vec_def_for_operand (op, next_stmt,
4813 VEC_quick_push(tree, dr_chain, vec_oprnd);
4814 VEC_quick_push(tree, oprnds, vec_oprnd);
4815 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4818 dataref_ptr = vect_create_data_ref_ptr (first_stmt, NULL, NULL_TREE,
4819 &dummy, &ptr_incr, false,
4820 TREE_TYPE (vec_oprnd), &inv_p);
4821 gcc_assert (!inv_p);
4825 /* FORNOW SLP doesn't work for multiple types. */
4828 /* For interleaved stores we created vectorized defs for all the
4829 defs stored in OPRNDS in the previous iteration (previous copy).
4830 DR_CHAIN is then used as an input to vect_permute_store_chain(),
4831 and OPRNDS as an input to vect_get_vec_def_for_stmt_copy() for the
4833 If the store is not strided, GROUP_SIZE is 1, and DR_CHAIN and
4834 OPRNDS are of size 1. */
4835 for (i = 0; i < group_size; i++)
4837 op = VEC_index (tree, oprnds, i);
4838 vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
4839 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt, op);
4840 VEC_replace(tree, dr_chain, i, vec_oprnd);
4841 VEC_replace(tree, oprnds, i, vec_oprnd);
4844 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
4849 result_chain = VEC_alloc (tree, heap, group_size);
4851 if (!vect_permute_store_chain (dr_chain, group_size, stmt, bsi,
4856 next_stmt = first_stmt;
4857 for (i = 0; i < vec_num; i++)
4860 /* Bump the vector pointer. */
4861 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
4865 vec_oprnd = VEC_index (tree, vec_oprnds, i);
4866 else if (strided_store)
4867 /* For strided stores vectorized defs are interleaved in
4868 vect_permute_store_chain(). */
4869 vec_oprnd = VEC_index (tree, result_chain, i);
4871 data_ref = build_fold_indirect_ref (dataref_ptr);
4872 /* Arguments are ready. Create the new vector stmt. */
4873 new_stmt = build_gimple_modify_stmt (data_ref, vec_oprnd);
4874 vect_finish_stmt_generation (stmt, new_stmt, bsi);
4875 mark_symbols_for_renaming (new_stmt);
4878 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
4880 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
4882 prev_stmt_info = vinfo_for_stmt (new_stmt);
4883 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
4893 /* Function vect_setup_realignment
4895 This function is called when vectorizing an unaligned load using
4896 the dr_explicit_realign[_optimized] scheme.
4897 This function generates the following code at the loop prolog:
4900 x msq_init = *(floor(p)); # prolog load
4901 realignment_token = call target_builtin;
4903 x msq = phi (msq_init, ---)
4905 The stmts marked with x are generated only for the case of
4906 dr_explicit_realign_optimized.
4908 The code above sets up a new (vector) pointer, pointing to the first
4909 location accessed by STMT, and a "floor-aligned" load using that pointer.
4910 It also generates code to compute the "realignment-token" (if the relevant
4911 target hook was defined), and creates a phi-node at the loop-header bb
4912 whose arguments are the result of the prolog-load (created by this
4913 function) and the result of a load that takes place in the loop (to be
4914 created by the caller to this function).
4916 For the case of dr_explicit_realign_optimized:
4917 The caller to this function uses the phi-result (msq) to create the
4918 realignment code inside the loop, and sets up the missing phi argument,
4921 msq = phi (msq_init, lsq)
4922 lsq = *(floor(p')); # load in loop
4923 result = realign_load (msq, lsq, realignment_token);
4925 For the case of dr_explicit_realign:
4927 msq = *(floor(p)); # load in loop
4929 lsq = *(floor(p')); # load in loop
4930 result = realign_load (msq, lsq, realignment_token);
4933 STMT - (scalar) load stmt to be vectorized. This load accesses
4934 a memory location that may be unaligned.
4935 BSI - place where new code is to be inserted.
4936 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4940 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4941 target hook, if defined.
4942 Return value - the result of the loop-header phi node. */
4945 vect_setup_realignment (tree stmt, block_stmt_iterator *bsi,
4946 tree *realignment_token,
4947 enum dr_alignment_support alignment_support_scheme,
4949 struct loop **at_loop)
4951 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4952 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4953 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4954 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4956 tree scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
4963 tree msq_init = NULL_TREE;
4966 tree msq = NULL_TREE;
4967 tree stmts = NULL_TREE;
4969 bool compute_in_loop = false;
4970 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4971 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
4972 struct loop *loop_for_initial_load;
4974 gcc_assert (alignment_support_scheme == dr_explicit_realign
4975 || alignment_support_scheme == dr_explicit_realign_optimized);
4977 /* We need to generate three things:
4978 1. the misalignment computation
4979 2. the extra vector load (for the optimized realignment scheme).
4980 3. the phi node for the two vectors from which the realignment is
4981 done (for the optimized realignment scheme).
4984 /* 1. Determine where to generate the misalignment computation.
4986 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4987 calculation will be generated by this function, outside the loop (in the
4988 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4989 caller, inside the loop.
4991 Background: If the misalignment remains fixed throughout the iterations of
4992 the loop, then both realignment schemes are applicable, and also the
4993 misalignment computation can be done outside LOOP. This is because we are
4994 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4995 are a multiple of VS (the Vector Size), and therefore the misalignment in
4996 different vectorized LOOP iterations is always the same.
4997 The problem arises only if the memory access is in an inner-loop nested
4998 inside LOOP, which is now being vectorized using outer-loop vectorization.
4999 This is the only case when the misalignment of the memory access may not
5000 remain fixed throughout the iterations of the inner-loop (as explained in
5001 detail in vect_supportable_dr_alignment). In this case, not only is the
5002 optimized realignment scheme not applicable, but also the misalignment
5003 computation (and generation of the realignment token that is passed to
5004 REALIGN_LOAD) have to be done inside the loop.
5006 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5007 or not, which in turn determines if the misalignment is computed inside
5008 the inner-loop, or outside LOOP. */
5010 if (init_addr != NULL_TREE)
5012 compute_in_loop = true;
5013 gcc_assert (alignment_support_scheme == dr_explicit_realign);
5017 /* 2. Determine where to generate the extra vector load.
5019 For the optimized realignment scheme, instead of generating two vector
5020 loads in each iteration, we generate a single extra vector load in the
5021 preheader of the loop, and in each iteration reuse the result of the
5022 vector load from the previous iteration. In case the memory access is in
5023 an inner-loop nested inside LOOP, which is now being vectorized using
5024 outer-loop vectorization, we need to determine whether this initial vector
5025 load should be generated at the preheader of the inner-loop, or can be
5026 generated at the preheader of LOOP. If the memory access has no evolution
5027 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5028 to be generated inside LOOP (in the preheader of the inner-loop). */
5030 if (nested_in_vect_loop)
5032 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5033 bool invariant_in_outerloop =
5034 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5035 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5038 loop_for_initial_load = loop;
5040 *at_loop = loop_for_initial_load;
5042 /* 3. For the case of the optimized realignment, create the first vector
5043 load at the loop preheader. */
5045 if (alignment_support_scheme == dr_explicit_realign_optimized)
5047 /* Create msq_init = *(floor(p1)) in the loop preheader */
5049 gcc_assert (!compute_in_loop);
5050 pe = loop_preheader_edge (loop_for_initial_load);
5051 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5052 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
5053 &init_addr, &inc, true, NULL_TREE, &inv_p);
5054 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5055 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5056 new_temp = make_ssa_name (vec_dest, new_stmt);
5057 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5058 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5059 gcc_assert (!new_bb);
5060 msq_init = GIMPLE_STMT_OPERAND (new_stmt, 0);
5063 /* 4. Create realignment token using a target builtin, if available.
5064 It is done either inside the containing loop, or before LOOP (as
5065 determined above). */
5067 if (targetm.vectorize.builtin_mask_for_load)
5071 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
5072 if (compute_in_loop)
5073 gcc_assert (init_addr); /* already computed by the caller. */
5076 /* Generate the INIT_ADDR computation outside LOOP. */
5077 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
5079 pe = loop_preheader_edge (loop);
5080 new_bb = bsi_insert_on_edge_immediate (pe, stmts);
5081 gcc_assert (!new_bb);
5084 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5085 new_stmt = build_call_expr (builtin_decl, 1, init_addr);
5086 vec_dest = vect_create_destination_var (scalar_dest,
5087 TREE_TYPE (new_stmt));
5088 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5089 new_temp = make_ssa_name (vec_dest, new_stmt);
5090 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5092 if (compute_in_loop)
5093 bsi_insert_before (bsi, new_stmt, BSI_SAME_STMT);
5096 /* Generate the misalignment computation outside LOOP. */
5097 pe = loop_preheader_edge (loop);
5098 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
5099 gcc_assert (!new_bb);
5102 *realignment_token = GIMPLE_STMT_OPERAND (new_stmt, 0);
5104 /* The result of the CALL_EXPR to this builtin is determined from
5105 the value of the parameter and no global variables are touched
5106 which makes the builtin a "const" function. Requiring the
5107 builtin to have the "const" attribute makes it unnecessary
5108 to call mark_call_clobbered. */
5109 gcc_assert (TREE_READONLY (builtin_decl));
5112 if (alignment_support_scheme == dr_explicit_realign)
5115 gcc_assert (!compute_in_loop);
5116 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5119 /* 5. Create msq = phi <msq_init, lsq> in loop */
5121 pe = loop_preheader_edge (containing_loop);
5122 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5123 msq = make_ssa_name (vec_dest, NULL_TREE);
5124 phi_stmt = create_phi_node (msq, containing_loop->header);
5125 SSA_NAME_DEF_STMT (msq) = phi_stmt;
5126 add_phi_arg (phi_stmt, msq_init, pe);
5132 /* Function vect_strided_load_supported.
5134 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
5135 and FALSE otherwise. */
5138 vect_strided_load_supported (tree vectype)
5140 optab perm_even_optab, perm_odd_optab;
5143 mode = (int) TYPE_MODE (vectype);
5145 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype);
5146 if (!perm_even_optab)
5148 if (vect_print_dump_info (REPORT_DETAILS))
5149 fprintf (vect_dump, "no optab for perm_even.");
5153 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
5155 if (vect_print_dump_info (REPORT_DETAILS))
5156 fprintf (vect_dump, "perm_even op not supported by target.");
5160 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype);
5161 if (!perm_odd_optab)
5163 if (vect_print_dump_info (REPORT_DETAILS))
5164 fprintf (vect_dump, "no optab for perm_odd.");
5168 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
5170 if (vect_print_dump_info (REPORT_DETAILS))
5171 fprintf (vect_dump, "perm_odd op not supported by target.");
5178 /* Function vect_permute_load_chain.
5180 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
5181 a power of 2, generate extract_even/odd stmts to reorder the input data
5182 correctly. Return the final references for loads in RESULT_CHAIN.
5184 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5185 The input is 4 vectors each containing 8 elements. We assign a number to each
5186 element, the input sequence is:
5188 1st vec: 0 1 2 3 4 5 6 7
5189 2nd vec: 8 9 10 11 12 13 14 15
5190 3rd vec: 16 17 18 19 20 21 22 23
5191 4th vec: 24 25 26 27 28 29 30 31
5193 The output sequence should be:
5195 1st vec: 0 4 8 12 16 20 24 28
5196 2nd vec: 1 5 9 13 17 21 25 29
5197 3rd vec: 2 6 10 14 18 22 26 30
5198 4th vec: 3 7 11 15 19 23 27 31
5200 i.e., the first output vector should contain the first elements of each
5201 interleaving group, etc.
5203 We use extract_even/odd instructions to create such output. The input of each
5204 extract_even/odd operation is two vectors
5208 and the output is the vector of extracted even/odd elements. The output of
5209 extract_even will be: 0 2 4 6
5210 and of extract_odd: 1 3 5 7
5213 The permutation is done in log LENGTH stages. In each stage extract_even and
5214 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
5215 order. In our example,
5217 E1: extract_even (1st vec, 2nd vec)
5218 E2: extract_odd (1st vec, 2nd vec)
5219 E3: extract_even (3rd vec, 4th vec)
5220 E4: extract_odd (3rd vec, 4th vec)
5222 The output for the first stage will be:
5224 E1: 0 2 4 6 8 10 12 14
5225 E2: 1 3 5 7 9 11 13 15
5226 E3: 16 18 20 22 24 26 28 30
5227 E4: 17 19 21 23 25 27 29 31
5229 In order to proceed and create the correct sequence for the next stage (or
5230 for the correct output, if the second stage is the last one, as in our
5231 example), we first put the output of extract_even operation and then the
5232 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5233 The input for the second stage is:
5235 1st vec (E1): 0 2 4 6 8 10 12 14
5236 2nd vec (E3): 16 18 20 22 24 26 28 30
5237 3rd vec (E2): 1 3 5 7 9 11 13 15
5238 4th vec (E4): 17 19 21 23 25 27 29 31
5240 The output of the second stage:
5242 E1: 0 4 8 12 16 20 24 28
5243 E2: 2 6 10 14 18 22 26 30
5244 E3: 1 5 9 13 17 21 25 29
5245 E4: 3 7 11 15 19 23 27 31
5247 And RESULT_CHAIN after reordering:
5249 1st vec (E1): 0 4 8 12 16 20 24 28
5250 2nd vec (E3): 1 5 9 13 17 21 25 29
5251 3rd vec (E2): 2 6 10 14 18 22 26 30
5252 4th vec (E4): 3 7 11 15 19 23 27 31. */
5255 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
5256 unsigned int length,
5258 block_stmt_iterator *bsi,
5259 VEC(tree,heap) **result_chain)
5261 tree perm_dest, perm_stmt, data_ref, first_vect, second_vect;
5262 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
5267 /* Check that the operation is supported. */
5268 if (!vect_strided_load_supported (vectype))
5271 *result_chain = VEC_copy (tree, heap, dr_chain);
5272 for (i = 0; i < exact_log2 (length); i++)
5274 for (j = 0; j < length; j +=2)
5276 first_vect = VEC_index (tree, dr_chain, j);
5277 second_vect = VEC_index (tree, dr_chain, j+1);
5279 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5280 perm_dest = create_tmp_var (vectype, "vect_perm_even");
5281 DECL_GIMPLE_REG_P (perm_dest) = 1;
5282 add_referenced_var (perm_dest);
5284 tmp = build2 (VEC_EXTRACT_EVEN_EXPR, vectype,
5285 first_vect, second_vect);
5286 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5288 data_ref = make_ssa_name (perm_dest, perm_stmt);
5289 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5290 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5291 mark_symbols_for_renaming (perm_stmt);
5293 VEC_replace (tree, *result_chain, j/2, data_ref);
5295 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5296 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
5297 DECL_GIMPLE_REG_P (perm_dest) = 1;
5298 add_referenced_var (perm_dest);
5300 tmp = build2 (VEC_EXTRACT_ODD_EXPR, vectype,
5301 first_vect, second_vect);
5302 perm_stmt = build_gimple_modify_stmt (perm_dest, tmp);
5303 data_ref = make_ssa_name (perm_dest, perm_stmt);
5304 GIMPLE_STMT_OPERAND (perm_stmt, 0) = data_ref;
5305 vect_finish_stmt_generation (stmt, perm_stmt, bsi);
5306 mark_symbols_for_renaming (perm_stmt);
5308 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
5310 dr_chain = VEC_copy (tree, heap, *result_chain);
5316 /* Function vect_transform_strided_load.
5318 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5319 to perform their permutation and ascribe the result vectorized statements to
5320 the scalar statements.
5324 vect_transform_strided_load (tree stmt, VEC(tree,heap) *dr_chain, int size,
5325 block_stmt_iterator *bsi)
5327 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5328 tree first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5329 tree next_stmt, new_stmt;
5330 VEC(tree,heap) *result_chain = NULL;
5331 unsigned int i, gap_count;
5334 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5335 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5336 vectors, that are ready for vector computation. */
5337 result_chain = VEC_alloc (tree, heap, size);
5339 if (!vect_permute_load_chain (dr_chain, size, stmt, bsi, &result_chain))
5342 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5343 Since we scan the chain starting from it's first node, their order
5344 corresponds the order of data-refs in RESULT_CHAIN. */
5345 next_stmt = first_stmt;
5347 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
5352 /* Skip the gaps. Loads created for the gaps will be removed by dead
5353 code elimination pass later.
5354 DR_GROUP_GAP is the number of steps in elements from the previous
5355 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
5356 correspond to the gaps.
5358 if (gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
5366 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5367 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5368 copies, and we put the new vector statement in the first available
5370 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5371 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5374 tree prev_stmt = STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5375 tree rel_stmt = STMT_VINFO_RELATED_STMT (
5376 vinfo_for_stmt (prev_stmt));
5379 prev_stmt = rel_stmt;
5380 rel_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5382 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) = new_stmt;
5384 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
5386 /* If NEXT_STMT accesses the same DR as the previous statement,
5387 put the same TMP_DATA_REF as its vectorized statement; otherwise
5388 get the next data-ref from RESULT_CHAIN. */
5389 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
5397 /* vectorizable_load.
5399 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
5401 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5402 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
5403 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5406 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt,
5410 tree vec_dest = NULL;
5411 tree data_ref = NULL;
5413 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5414 stmt_vec_info prev_stmt_info;
5415 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5416 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5417 struct loop *containing_loop = (bb_for_stmt (stmt))->loop_father;
5418 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
5419 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
5420 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5423 tree new_stmt = NULL_TREE;
5425 enum dr_alignment_support alignment_support_scheme;
5426 tree dataref_ptr = NULL_TREE;
5428 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5429 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5430 int i, j, group_size;
5431 tree msq = NULL_TREE, lsq;
5432 tree offset = NULL_TREE;
5433 tree realignment_token = NULL_TREE;
5434 tree phi = NULL_TREE;
5435 VEC(tree,heap) *dr_chain = NULL;
5436 bool strided_load = false;
5440 bool compute_in_loop = false;
5441 struct loop *at_loop;
5443 bool slp = (slp_node != NULL);
5445 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
5446 this, so we can safely override NCOPIES with 1 here. */
5450 gcc_assert (ncopies >= 1);
5452 /* FORNOW. This restriction should be relaxed. */
5453 if (nested_in_vect_loop && ncopies > 1)
5455 if (vect_print_dump_info (REPORT_DETAILS))
5456 fprintf (vect_dump, "multiple types in nested loop.");
5460 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5463 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5466 /* Is vectorizable load? */
5467 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5470 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
5471 if (TREE_CODE (scalar_dest) != SSA_NAME)
5474 op = GIMPLE_STMT_OPERAND (stmt, 1);
5475 if (TREE_CODE (op) != ARRAY_REF
5476 && TREE_CODE (op) != INDIRECT_REF
5477 && !STMT_VINFO_STRIDED_ACCESS (stmt_info))
5480 if (!STMT_VINFO_DATA_REF (stmt_info))
5483 scalar_type = TREE_TYPE (DR_REF (dr));
5484 mode = (int) TYPE_MODE (vectype);
5486 /* FORNOW. In some cases can vectorize even if data-type not supported
5487 (e.g. - data copies). */
5488 if (optab_handler (mov_optab, mode)->insn_code == CODE_FOR_nothing)
5490 if (vect_print_dump_info (REPORT_DETAILS))
5491 fprintf (vect_dump, "Aligned load, but unsupported type.");
5495 /* Check if the load is a part of an interleaving chain. */
5496 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
5498 strided_load = true;
5500 gcc_assert (! nested_in_vect_loop);
5502 /* Check if interleaving is supported. */
5503 if (!vect_strided_load_supported (vectype)
5504 && !PURE_SLP_STMT (stmt_info) && !slp)
5508 if (!vec_stmt) /* transformation not required. */
5510 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
5511 vect_model_load_cost (stmt_info, ncopies, NULL);
5515 if (vect_print_dump_info (REPORT_DETAILS))
5516 fprintf (vect_dump, "transform load.");
5522 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
5523 /* Check if the chain of loads is already vectorized. */
5524 if (STMT_VINFO_VEC_STMT (vinfo_for_stmt (first_stmt)))
5526 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5529 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
5530 group_size = DR_GROUP_SIZE (vinfo_for_stmt (first_stmt));
5531 dr_chain = VEC_alloc (tree, heap, group_size);
5533 /* VEC_NUM is the number of vect stmts to be created for this group. */
5536 strided_load = false;
5537 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
5540 vec_num = group_size;
5546 group_size = vec_num = 1;
5549 alignment_support_scheme = vect_supportable_dr_alignment (first_dr);
5550 gcc_assert (alignment_support_scheme);
5552 /* In case the vectorization factor (VF) is bigger than the number
5553 of elements that we can fit in a vectype (nunits), we have to generate
5554 more than one vector stmt - i.e - we need to "unroll" the
5555 vector stmt by a factor VF/nunits. In doing so, we record a pointer
5556 from one copy of the vector stmt to the next, in the field
5557 STMT_VINFO_RELATED_STMT. This is necessary in order to allow following
5558 stages to find the correct vector defs to be used when vectorizing
5559 stmts that use the defs of the current stmt. The example below illustrates
5560 the vectorization process when VF=16 and nunits=4 (i.e - we need to create
5561 4 vectorized stmts):
5563 before vectorization:
5564 RELATED_STMT VEC_STMT
5568 step 1: vectorize stmt S1:
5569 We first create the vector stmt VS1_0, and, as usual, record a
5570 pointer to it in the STMT_VINFO_VEC_STMT of the scalar stmt S1.
5571 Next, we create the vector stmt VS1_1, and record a pointer to
5572 it in the STMT_VINFO_RELATED_STMT of the vector stmt VS1_0.
5573 Similarly, for VS1_2 and VS1_3. This is the resulting chain of
5575 RELATED_STMT VEC_STMT
5576 VS1_0: vx0 = memref0 VS1_1 -
5577 VS1_1: vx1 = memref1 VS1_2 -
5578 VS1_2: vx2 = memref2 VS1_3 -
5579 VS1_3: vx3 = memref3 - -
5580 S1: x = load - VS1_0
5583 See in documentation in vect_get_vec_def_for_stmt_copy for how the
5584 information we recorded in RELATED_STMT field is used to vectorize
5587 /* In case of interleaving (non-unit strided access):
5594 Vectorized loads are created in the order of memory accesses
5595 starting from the access of the first stmt of the chain:
5598 VS2: vx1 = &base + vec_size*1
5599 VS3: vx3 = &base + vec_size*2
5600 VS4: vx4 = &base + vec_size*3
5602 Then permutation statements are generated:
5604 VS5: vx5 = VEC_EXTRACT_EVEN_EXPR < vx0, vx1 >
5605 VS6: vx6 = VEC_EXTRACT_ODD_EXPR < vx0, vx1 >
5608 And they are put in STMT_VINFO_VEC_STMT of the corresponding scalar stmts
5609 (the order of the data-refs in the output of vect_permute_load_chain
5610 corresponds to the order of scalar stmts in the interleaving chain - see
5611 the documentation of vect_permute_load_chain()).
5612 The generation of permutation stmts and recording them in
5613 STMT_VINFO_VEC_STMT is done in vect_transform_strided_load().
5615 In case of both multiple types and interleaving, the vector loads and
5616 permutation stmts above are created for every copy. The result vector stmts
5617 are put in STMT_VINFO_VEC_STMT for the first copy and in the corresponding
5618 STMT_VINFO_RELATED_STMT for the next copies. */
5620 /* If the data reference is aligned (dr_aligned) or potentially unaligned
5621 on a target that supports unaligned accesses (dr_unaligned_supported)
5622 we generate the following code:
5626 p = p + indx * vectype_size;
5631 Otherwise, the data reference is potentially unaligned on a target that
5632 does not support unaligned accesses (dr_explicit_realign_optimized) -
5633 then generate the following code, in which the data in each iteration is
5634 obtained by two vector loads, one from the previous iteration, and one
5635 from the current iteration:
5637 msq_init = *(floor(p1))
5638 p2 = initial_addr + VS - 1;
5639 realignment_token = call target_builtin;
5642 p2 = p2 + indx * vectype_size
5644 vec_dest = realign_load (msq, lsq, realignment_token)
5649 /* If the misalignment remains the same throughout the execution of the
5650 loop, we can create the init_addr and permutation mask at the loop
5651 preheader. Otherwise, it needs to be created inside the loop.
5652 This can only occur when vectorizing memory accesses in the inner-loop
5653 nested within an outer-loop that is being vectorized. */
5655 if (nested_in_vect_loop_p (loop, stmt)
5656 && (TREE_INT_CST_LOW (DR_STEP (dr)) % UNITS_PER_SIMD_WORD != 0))
5658 gcc_assert (alignment_support_scheme != dr_explicit_realign_optimized);
5659 compute_in_loop = true;
5662 if ((alignment_support_scheme == dr_explicit_realign_optimized
5663 || alignment_support_scheme == dr_explicit_realign)
5664 && !compute_in_loop)
5666 msq = vect_setup_realignment (first_stmt, bsi, &realignment_token,
5667 alignment_support_scheme, NULL_TREE,
5669 if (alignment_support_scheme == dr_explicit_realign_optimized)
5671 phi = SSA_NAME_DEF_STMT (msq);
5672 offset = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5678 prev_stmt_info = NULL;
5679 for (j = 0; j < ncopies; j++)
5681 /* 1. Create the vector pointer update chain. */
5683 dataref_ptr = vect_create_data_ref_ptr (first_stmt,
5685 &dummy, &ptr_incr, false,
5689 bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt, NULL_TREE);
5691 for (i = 0; i < vec_num; i++)
5694 dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr, bsi, stmt,
5697 /* 2. Create the vector-load in the loop. */
5698 switch (alignment_support_scheme)
5701 gcc_assert (aligned_access_p (first_dr));
5702 data_ref = build_fold_indirect_ref (dataref_ptr);
5704 case dr_unaligned_supported:
5706 int mis = DR_MISALIGNMENT (first_dr);
5707 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
5709 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
5711 build2 (MISALIGNED_INDIRECT_REF, vectype, dataref_ptr, tmis);
5714 case dr_explicit_realign:
5717 tree vs_minus_1 = size_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
5719 if (compute_in_loop)
5720 msq = vect_setup_realignment (first_stmt, bsi,
5722 dr_explicit_realign,
5725 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5726 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5727 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5728 new_temp = make_ssa_name (vec_dest, new_stmt);
5729 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5730 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5731 copy_virtual_operands (new_stmt, stmt);
5732 mark_symbols_for_renaming (new_stmt);
5735 bump = size_binop (MULT_EXPR, vs_minus_1,
5736 TYPE_SIZE_UNIT (scalar_type));
5737 ptr = bump_vector_ptr (dataref_ptr, NULL_TREE, bsi, stmt, bump);
5738 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
5741 case dr_explicit_realign_optimized:
5742 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
5747 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5748 new_stmt = build_gimple_modify_stmt (vec_dest, data_ref);
5749 new_temp = make_ssa_name (vec_dest, new_stmt);
5750 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5751 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5752 mark_symbols_for_renaming (new_stmt);
5754 /* 3. Handle explicit realignment if necessary/supported. Create in
5755 loop: vec_dest = realign_load (msq, lsq, realignment_token) */
5756 if (alignment_support_scheme == dr_explicit_realign_optimized
5757 || alignment_support_scheme == dr_explicit_realign)
5759 lsq = GIMPLE_STMT_OPERAND (new_stmt, 0);
5760 if (!realignment_token)
5761 realignment_token = dataref_ptr;
5762 vec_dest = vect_create_destination_var (scalar_dest, vectype);
5763 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq,
5765 new_stmt = build_gimple_modify_stmt (vec_dest, new_stmt);
5766 new_temp = make_ssa_name (vec_dest, new_stmt);
5767 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5768 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5770 if (alignment_support_scheme == dr_explicit_realign_optimized)
5772 if (i == vec_num - 1 && j == ncopies - 1)
5773 add_phi_arg (phi, lsq, loop_latch_edge (containing_loop));
5778 /* 4. Handle invariant-load. */
5781 gcc_assert (!strided_load);
5782 gcc_assert (nested_in_vect_loop_p (loop, stmt));
5787 tree vec_inv, bitpos, bitsize = TYPE_SIZE (scalar_type);
5789 /* CHECKME: bitpos depends on endianess? */
5790 bitpos = bitsize_zero_node;
5791 vec_inv = build3 (BIT_FIELD_REF, scalar_type, new_temp,
5793 BIT_FIELD_REF_UNSIGNED (vec_inv) =
5794 TYPE_UNSIGNED (scalar_type);
5796 vect_create_destination_var (scalar_dest, NULL_TREE);
5797 new_stmt = build_gimple_modify_stmt (vec_dest, vec_inv);
5798 new_temp = make_ssa_name (vec_dest, new_stmt);
5799 GIMPLE_STMT_OPERAND (new_stmt, 0) = new_temp;
5800 vect_finish_stmt_generation (stmt, new_stmt, bsi);
5802 for (k = nunits - 1; k >= 0; --k)
5803 t = tree_cons (NULL_TREE, new_temp, t);
5804 /* FIXME: use build_constructor directly. */
5805 vec_inv = build_constructor_from_list (vectype, t);
5806 new_temp = vect_init_vector (stmt, vec_inv, vectype, bsi);
5807 new_stmt = SSA_NAME_DEF_STMT (new_temp);
5810 gcc_unreachable (); /* FORNOW. */
5813 /* Collect vector loads and later create their permutation in
5814 vect_transform_strided_load (). */
5816 VEC_quick_push (tree, dr_chain, new_temp);
5818 /* Store vector loads in the corresponding SLP_NODE. */
5820 VEC_quick_push (tree, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
5823 /* FORNOW: SLP with multiple types is unsupported. */
5829 if (!vect_transform_strided_load (stmt, dr_chain, group_size, bsi))
5831 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
5832 dr_chain = VEC_alloc (tree, heap, group_size);
5837 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
5839 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
5840 prev_stmt_info = vinfo_for_stmt (new_stmt);
5848 /* Function vectorizable_live_operation.
5850 STMT computes a value that is used outside the loop. Check if
5851 it can be supported. */
5854 vectorizable_live_operation (tree stmt,
5855 block_stmt_iterator *bsi ATTRIBUTE_UNUSED,
5856 tree *vec_stmt ATTRIBUTE_UNUSED)
5859 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5860 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5861 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5866 enum vect_def_type dt;
5868 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
5870 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
5873 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
5876 if (TREE_CODE (GIMPLE_STMT_OPERAND (stmt, 0)) != SSA_NAME)
5879 /* FORNOW. CHECKME. */
5880 if (nested_in_vect_loop_p (loop, stmt))
5883 operation = GIMPLE_STMT_OPERAND (stmt, 1);
5884 op_type = TREE_OPERAND_LENGTH (operation);
5886 /* FORNOW: support only if all uses are invariant. This means
5887 that the scalar operations can remain in place, unvectorized.
5888 The original last scalar value that they compute will be used. */
5890 for (i = 0; i < op_type; i++)
5892 op = TREE_OPERAND (operation, i);
5893 if (op && !vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt))
5895 if (vect_print_dump_info (REPORT_DETAILS))
5896 fprintf (vect_dump, "use not simple.");
5900 if (dt != vect_invariant_def && dt != vect_constant_def)
5904 /* No transformation is required for the cases we currently support. */
5909 /* Function vect_is_simple_cond.
5912 LOOP - the loop that is being vectorized.
5913 COND - Condition that is checked for simple use.
5915 Returns whether a COND can be vectorized. Checks whether
5916 condition operands are supportable using vec_is_simple_use. */
5919 vect_is_simple_cond (tree cond, loop_vec_info loop_vinfo)
5923 enum vect_def_type dt;
5925 if (!COMPARISON_CLASS_P (cond))
5928 lhs = TREE_OPERAND (cond, 0);
5929 rhs = TREE_OPERAND (cond, 1);
5931 if (TREE_CODE (lhs) == SSA_NAME)
5933 tree lhs_def_stmt = SSA_NAME_DEF_STMT (lhs);
5934 if (!vect_is_simple_use (lhs, loop_vinfo, &lhs_def_stmt, &def, &dt))
5937 else if (TREE_CODE (lhs) != INTEGER_CST && TREE_CODE (lhs) != REAL_CST
5938 && TREE_CODE (lhs) != FIXED_CST)
5941 if (TREE_CODE (rhs) == SSA_NAME)
5943 tree rhs_def_stmt = SSA_NAME_DEF_STMT (rhs);
5944 if (!vect_is_simple_use (rhs, loop_vinfo, &rhs_def_stmt, &def, &dt))
5947 else if (TREE_CODE (rhs) != INTEGER_CST && TREE_CODE (rhs) != REAL_CST
5948 && TREE_CODE (rhs) != FIXED_CST)
5954 /* vectorizable_condition.
5956 Check if STMT is conditional modify expression that can be vectorized.
5957 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
5958 stmt using VEC_COND_EXPR to replace it, put it in VEC_STMT, and insert it
5961 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
5964 vectorizable_condition (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
5966 tree scalar_dest = NULL_TREE;
5967 tree vec_dest = NULL_TREE;
5968 tree op = NULL_TREE;
5969 tree cond_expr, then_clause, else_clause;
5970 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5971 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5972 tree vec_cond_lhs, vec_cond_rhs, vec_then_clause, vec_else_clause;
5973 tree vec_compare, vec_cond_expr;
5975 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5976 enum machine_mode vec_mode;
5978 enum vect_def_type dt;
5979 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
5980 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
5982 gcc_assert (ncopies >= 1);
5984 return false; /* FORNOW */
5986 if (!STMT_VINFO_RELEVANT_P (stmt_info))
5989 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
5992 /* FORNOW: SLP not supported. */
5993 if (STMT_SLP_TYPE (stmt_info))
5996 /* FORNOW: not yet supported. */
5997 if (STMT_VINFO_LIVE_P (stmt_info))
5999 if (vect_print_dump_info (REPORT_DETAILS))
6000 fprintf (vect_dump, "value used after loop.");
6004 /* Is vectorizable conditional operation? */
6005 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
6008 op = GIMPLE_STMT_OPERAND (stmt, 1);
6010 if (TREE_CODE (op) != COND_EXPR)
6013 cond_expr = TREE_OPERAND (op, 0);
6014 then_clause = TREE_OPERAND (op, 1);
6015 else_clause = TREE_OPERAND (op, 2);
6017 if (!vect_is_simple_cond (cond_expr, loop_vinfo))
6020 /* We do not handle two different vector types for the condition
6022 if (TREE_TYPE (TREE_OPERAND (cond_expr, 0)) != TREE_TYPE (vectype))
6025 if (TREE_CODE (then_clause) == SSA_NAME)
6027 tree then_def_stmt = SSA_NAME_DEF_STMT (then_clause);
6028 if (!vect_is_simple_use (then_clause, loop_vinfo,
6029 &then_def_stmt, &def, &dt))
6032 else if (TREE_CODE (then_clause) != INTEGER_CST
6033 && TREE_CODE (then_clause) != REAL_CST
6034 && TREE_CODE (then_clause) != FIXED_CST)
6037 if (TREE_CODE (else_clause) == SSA_NAME)
6039 tree else_def_stmt = SSA_NAME_DEF_STMT (else_clause);
6040 if (!vect_is_simple_use (else_clause, loop_vinfo,
6041 &else_def_stmt, &def, &dt))
6044 else if (TREE_CODE (else_clause) != INTEGER_CST
6045 && TREE_CODE (else_clause) != REAL_CST
6046 && TREE_CODE (else_clause) != FIXED_CST)
6050 vec_mode = TYPE_MODE (vectype);
6054 STMT_VINFO_TYPE (stmt_info) = condition_vec_info_type;
6055 return expand_vec_cond_expr_p (op, vec_mode);
6061 scalar_dest = GIMPLE_STMT_OPERAND (stmt, 0);
6062 vec_dest = vect_create_destination_var (scalar_dest, vectype);
6064 /* Handle cond expr. */
6066 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 0), stmt, NULL);
6068 vect_get_vec_def_for_operand (TREE_OPERAND (cond_expr, 1), stmt, NULL);
6069 vec_then_clause = vect_get_vec_def_for_operand (then_clause, stmt, NULL);
6070 vec_else_clause = vect_get_vec_def_for_operand (else_clause, stmt, NULL);
6072 /* Arguments are ready. create the new vector stmt. */
6073 vec_compare = build2 (TREE_CODE (cond_expr), vectype,
6074 vec_cond_lhs, vec_cond_rhs);
6075 vec_cond_expr = build3 (VEC_COND_EXPR, vectype,
6076 vec_compare, vec_then_clause, vec_else_clause);
6078 *vec_stmt = build_gimple_modify_stmt (vec_dest, vec_cond_expr);
6079 new_temp = make_ssa_name (vec_dest, *vec_stmt);
6080 GIMPLE_STMT_OPERAND (*vec_stmt, 0) = new_temp;
6081 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
6087 /* Function vect_transform_stmt.
6089 Create a vectorized stmt to replace STMT, and insert it at BSI. */
6092 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi, bool *strided_store,
6095 bool is_store = false;
6096 tree vec_stmt = NULL_TREE;
6097 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6098 tree orig_stmt_in_pattern;
6101 switch (STMT_VINFO_TYPE (stmt_info))
6103 case type_demotion_vec_info_type:
6104 gcc_assert (!slp_node);
6105 done = vectorizable_type_demotion (stmt, bsi, &vec_stmt);
6109 case type_promotion_vec_info_type:
6110 gcc_assert (!slp_node);
6111 done = vectorizable_type_promotion (stmt, bsi, &vec_stmt);
6115 case type_conversion_vec_info_type:
6116 done = vectorizable_conversion (stmt, bsi, &vec_stmt, slp_node);
6120 case induc_vec_info_type:
6121 gcc_assert (!slp_node);
6122 done = vectorizable_induction (stmt, bsi, &vec_stmt);
6126 case op_vec_info_type:
6127 done = vectorizable_operation (stmt, bsi, &vec_stmt, slp_node);
6131 case assignment_vec_info_type:
6132 done = vectorizable_assignment (stmt, bsi, &vec_stmt, slp_node);
6136 case load_vec_info_type:
6137 done = vectorizable_load (stmt, bsi, &vec_stmt, slp_node);
6141 case store_vec_info_type:
6142 done = vectorizable_store (stmt, bsi, &vec_stmt, slp_node);
6144 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6146 /* In case of interleaving, the whole chain is vectorized when the
6147 last store in the chain is reached. Store stmts before the last
6148 one are skipped, and there vec_stmt_info shouldn't be freed
6150 *strided_store = true;
6151 if (STMT_VINFO_VEC_STMT (stmt_info))
6158 case condition_vec_info_type:
6159 gcc_assert (!slp_node);
6160 done = vectorizable_condition (stmt, bsi, &vec_stmt);
6164 case call_vec_info_type:
6165 gcc_assert (!slp_node);
6166 done = vectorizable_call (stmt, bsi, &vec_stmt);
6169 case reduc_vec_info_type:
6170 gcc_assert (!slp_node);
6171 done = vectorizable_reduction (stmt, bsi, &vec_stmt);
6176 if (!STMT_VINFO_LIVE_P (stmt_info))
6178 if (vect_print_dump_info (REPORT_DETAILS))
6179 fprintf (vect_dump, "stmt not supported.");
6184 if (STMT_VINFO_LIVE_P (stmt_info)
6185 && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
6187 done = vectorizable_live_operation (stmt, bsi, &vec_stmt);
6193 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
6194 orig_stmt_in_pattern = STMT_VINFO_RELATED_STMT (stmt_info);
6195 if (orig_stmt_in_pattern)
6197 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt_in_pattern);
6198 /* STMT was inserted by the vectorizer to replace a computation idiom.
6199 ORIG_STMT_IN_PATTERN is a stmt in the original sequence that
6200 computed this idiom. We need to record a pointer to VEC_STMT in
6201 the stmt_info of ORIG_STMT_IN_PATTERN. See more details in the
6202 documentation of vect_pattern_recog. */
6203 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
6205 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
6206 STMT_VINFO_VEC_STMT (stmt_vinfo) = vec_stmt;
6215 /* This function builds ni_name = number of iterations loop executes
6216 on the loop preheader. */
6219 vect_build_loop_niters (loop_vec_info loop_vinfo)
6221 tree ni_name, stmt, var;
6223 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6224 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
6226 var = create_tmp_var (TREE_TYPE (ni), "niters");
6227 add_referenced_var (var);
6228 ni_name = force_gimple_operand (ni, &stmt, false, var);
6230 pe = loop_preheader_edge (loop);
6233 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6234 gcc_assert (!new_bb);
6241 /* This function generates the following statements:
6243 ni_name = number of iterations loop executes
6244 ratio = ni_name / vf
6245 ratio_mult_vf_name = ratio * vf
6247 and places them at the loop preheader edge. */
6250 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
6252 tree *ratio_mult_vf_name_ptr,
6253 tree *ratio_name_ptr)
6261 tree ratio_mult_vf_name;
6262 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6263 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
6264 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6267 pe = loop_preheader_edge (loop);
6269 /* Generate temporary variable that contains
6270 number of iterations loop executes. */
6272 ni_name = vect_build_loop_niters (loop_vinfo);
6273 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf));
6275 /* Create: ratio = ni >> log2(vf) */
6277 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf);
6278 if (!is_gimple_val (ratio_name))
6280 var = create_tmp_var (TREE_TYPE (ni), "bnd");
6281 add_referenced_var (var);
6283 ratio_name = force_gimple_operand (ratio_name, &stmt, true, var);
6284 pe = loop_preheader_edge (loop);
6285 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6286 gcc_assert (!new_bb);
6289 /* Create: ratio_mult_vf = ratio << log2 (vf). */
6291 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name),
6292 ratio_name, log_vf);
6293 if (!is_gimple_val (ratio_mult_vf_name))
6295 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
6296 add_referenced_var (var);
6298 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmt,
6300 pe = loop_preheader_edge (loop);
6301 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6302 gcc_assert (!new_bb);
6305 *ni_name_ptr = ni_name;
6306 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
6307 *ratio_name_ptr = ratio_name;
6313 /* Function vect_update_ivs_after_vectorizer.
6315 "Advance" the induction variables of LOOP to the value they should take
6316 after the execution of LOOP. This is currently necessary because the
6317 vectorizer does not handle induction variables that are used after the
6318 loop. Such a situation occurs when the last iterations of LOOP are
6320 1. We introduced new uses after LOOP for IVs that were not originally used
6321 after LOOP: the IVs of LOOP are now used by an epilog loop.
6322 2. LOOP is going to be vectorized; this means that it will iterate N/VF
6323 times, whereas the loop IVs should be bumped N times.
6326 - LOOP - a loop that is going to be vectorized. The last few iterations
6327 of LOOP were peeled.
6328 - NITERS - the number of iterations that LOOP executes (before it is
6329 vectorized). i.e, the number of times the ivs should be bumped.
6330 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
6331 coming out from LOOP on which there are uses of the LOOP ivs
6332 (this is the path from LOOP->exit to epilog_loop->preheader).
6334 The new definitions of the ivs are placed in LOOP->exit.
6335 The phi args associated with the edge UPDATE_E in the bb
6336 UPDATE_E->dest are updated accordingly.
6338 Assumption 1: Like the rest of the vectorizer, this function assumes
6339 a single loop exit that has a single predecessor.
6341 Assumption 2: The phi nodes in the LOOP header and in update_bb are
6342 organized in the same order.
6344 Assumption 3: The access function of the ivs is simple enough (see
6345 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
6347 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
6348 coming out of LOOP on which the ivs of LOOP are used (this is the path
6349 that leads to the epilog loop; other paths skip the epilog loop). This
6350 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
6351 needs to have its phis updated.
6355 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters,
6358 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6359 basic_block exit_bb = single_exit (loop)->dest;
6361 basic_block update_bb = update_e->dest;
6363 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */
6365 /* Make sure there exists a single-predecessor exit bb: */
6366 gcc_assert (single_pred_p (exit_bb));
6368 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
6370 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
6372 tree access_fn = NULL;
6373 tree evolution_part;
6376 tree var, ni, ni_name;
6377 block_stmt_iterator last_bsi;
6379 if (vect_print_dump_info (REPORT_DETAILS))
6381 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: ");
6382 print_generic_expr (vect_dump, phi, TDF_SLIM);
6385 /* Skip virtual phi's. */
6386 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
6388 if (vect_print_dump_info (REPORT_DETAILS))
6389 fprintf (vect_dump, "virtual phi. skip.");
6393 /* Skip reduction phis. */
6394 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def)
6396 if (vect_print_dump_info (REPORT_DETAILS))
6397 fprintf (vect_dump, "reduc phi. skip.");
6401 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
6402 gcc_assert (access_fn);
6404 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
6405 gcc_assert (evolution_part != NULL_TREE);
6407 /* FORNOW: We do not support IVs whose evolution function is a polynomial
6408 of degree >= 2 or exponential. */
6409 gcc_assert (!tree_is_chrec (evolution_part));
6411 step_expr = evolution_part;
6412 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
6415 if (POINTER_TYPE_P (TREE_TYPE (init_expr)))
6416 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr),
6418 fold_convert (sizetype,
6419 fold_build2 (MULT_EXPR, TREE_TYPE (niters),
6420 niters, step_expr)));
6422 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr),
6423 fold_build2 (MULT_EXPR, TREE_TYPE (init_expr),
6424 fold_convert (TREE_TYPE (init_expr),
6431 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
6432 add_referenced_var (var);
6434 last_bsi = bsi_last (exit_bb);
6435 ni_name = force_gimple_operand_bsi (&last_bsi, ni, false, var,
6436 true, BSI_SAME_STMT);
6438 /* Fix phi expressions in the successor bb. */
6439 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name);
6444 /* Function vect_do_peeling_for_loop_bound
6446 Peel the last iterations of the loop represented by LOOP_VINFO.
6447 The peeled iterations form a new epilog loop. Given that the loop now
6448 iterates NITERS times, the new epilog loop iterates
6449 NITERS % VECTORIZATION_FACTOR times.
6451 The original loop will later be made to iterate
6452 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
6455 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio)
6457 tree ni_name, ratio_mult_vf_name;
6458 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6459 struct loop *new_loop;
6461 basic_block preheader;
6464 int min_scalar_loop_bound;
6465 int min_profitable_iters;
6467 if (vect_print_dump_info (REPORT_DETAILS))
6468 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ===");
6470 initialize_original_copy_tables ();
6472 /* Generate the following variables on the preheader of original loop:
6474 ni_name = number of iteration the original loop executes
6475 ratio = ni_name / vf
6476 ratio_mult_vf_name = ratio * vf */
6477 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
6478 &ratio_mult_vf_name, ratio);
6480 loop_num = loop->num;
6482 /* Analyze cost to set threshhold for vectorized loop. */
6483 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo);
6484 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
6485 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1);
6487 /* Use the cost model only if it is more conservative than user specified
6490 th = (unsigned) min_scalar_loop_bound;
6491 if (min_profitable_iters
6492 && (!min_scalar_loop_bound
6493 || min_profitable_iters > min_scalar_loop_bound))
6494 th = (unsigned) min_profitable_iters;
6496 if (((LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
6497 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
6498 && vect_print_dump_info (REPORT_DETAILS))
6499 fprintf (vect_dump, "vectorization may not be profitable.");
6501 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop),
6502 ratio_mult_vf_name, ni_name, false,
6504 gcc_assert (new_loop);
6505 gcc_assert (loop_num == loop->num);
6506 #ifdef ENABLE_CHECKING
6507 slpeel_verify_cfg_after_peeling (loop, new_loop);
6510 /* A guard that controls whether the new_loop is to be executed or skipped
6511 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
6512 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
6513 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
6514 is on the path where the LOOP IVs are used and need to be updated. */
6516 preheader = loop_preheader_edge (new_loop)->src;
6517 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest)
6518 update_e = EDGE_PRED (preheader, 0);
6520 update_e = EDGE_PRED (preheader, 1);
6522 /* Update IVs of original loop as if they were advanced
6523 by ratio_mult_vf_name steps. */
6524 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
6526 /* After peeling we have to reset scalar evolution analyzer. */
6529 free_original_copy_tables ();
6533 /* Function vect_gen_niters_for_prolog_loop
6535 Set the number of iterations for the loop represented by LOOP_VINFO
6536 to the minimum between LOOP_NITERS (the original iteration count of the loop)
6537 and the misalignment of DR - the data reference recorded in
6538 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
6539 this loop, the data reference DR will refer to an aligned location.
6541 The following computation is generated:
6543 If the misalignment of DR is known at compile time:
6544 addr_mis = int mis = DR_MISALIGNMENT (dr);
6545 Else, compute address misalignment in bytes:
6546 addr_mis = addr & (vectype_size - 1)
6548 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
6550 (elem_size = element type size; an element is the scalar element
6551 whose type is the inner type of the vectype)
6555 prolog_niters = min ( LOOP_NITERS ,
6556 (VF/group_size - addr_mis/elem_size)&(VF/group_size-1) )
6557 where group_size is the size of the interleaved group.
6559 The above formulas assume that VF == number of elements in the vector. This
6560 may not hold when there are multiple-types in the loop.
6561 In this case, for some data-references in the loop the VF does not represent
6562 the number of elements that fit in the vector. Therefore, instead of VF we
6563 use TYPE_VECTOR_SUBPARTS. */
6566 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
6568 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
6569 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6571 tree iters, iters_name;
6574 tree dr_stmt = DR_STMT (dr);
6575 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
6576 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6577 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
6578 tree niters_type = TREE_TYPE (loop_niters);
6580 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
6581 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
6583 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
6585 /* For interleaved access element size must be multiplied by the size of
6586 the interleaved group. */
6587 group_size = DR_GROUP_SIZE (vinfo_for_stmt (
6588 DR_GROUP_FIRST_DR (stmt_info)));
6589 element_size *= group_size;
6592 pe = loop_preheader_edge (loop);
6594 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
6596 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
6597 int elem_misalign = byte_misalign / element_size;
6599 if (vect_print_dump_info (REPORT_DETAILS))
6600 fprintf (vect_dump, "known alignment = %d.", byte_misalign);
6601 iters = build_int_cst (niters_type,
6602 (nelements - elem_misalign)&(nelements/group_size-1));
6606 tree new_stmts = NULL_TREE;
6607 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt,
6608 &new_stmts, NULL_TREE, loop);
6609 tree ptr_type = TREE_TYPE (start_addr);
6610 tree size = TYPE_SIZE (ptr_type);
6611 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
6612 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
6613 tree elem_size_log =
6614 build_int_cst (type, exact_log2 (vectype_align/nelements));
6615 tree nelements_minus_1 = build_int_cst (type, nelements - 1);
6616 tree nelements_tree = build_int_cst (type, nelements);
6620 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
6621 gcc_assert (!new_bb);
6623 /* Create: byte_misalign = addr & (vectype_size - 1) */
6625 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1);
6627 /* Create: elem_misalign = byte_misalign / element_size */
6629 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log);
6631 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */
6632 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign);
6633 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1);
6634 iters = fold_convert (niters_type, iters);
6637 /* Create: prolog_loop_niters = min (iters, loop_niters) */
6638 /* If the loop bound is known at compile time we already verified that it is
6639 greater than vf; since the misalignment ('iters') is at most vf, there's
6640 no need to generate the MIN_EXPR in this case. */
6641 if (TREE_CODE (loop_niters) != INTEGER_CST)
6642 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters);
6644 if (vect_print_dump_info (REPORT_DETAILS))
6646 fprintf (vect_dump, "niters for prolog loop: ");
6647 print_generic_expr (vect_dump, iters, TDF_SLIM);
6650 var = create_tmp_var (niters_type, "prolog_loop_niters");
6651 add_referenced_var (var);
6652 iters_name = force_gimple_operand (iters, &stmt, false, var);
6654 /* Insert stmt on loop preheader edge. */
6657 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
6658 gcc_assert (!new_bb);
6665 /* Function vect_update_init_of_dr
6667 NITERS iterations were peeled from LOOP. DR represents a data reference
6668 in LOOP. This function updates the information recorded in DR to
6669 account for the fact that the first NITERS iterations had already been
6670 executed. Specifically, it updates the OFFSET field of DR. */
6673 vect_update_init_of_dr (struct data_reference *dr, tree niters)
6675 tree offset = DR_OFFSET (dr);
6677 niters = fold_build2 (MULT_EXPR, TREE_TYPE (niters), niters, DR_STEP (dr));
6678 offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters);
6679 DR_OFFSET (dr) = offset;
6683 /* Function vect_update_inits_of_drs
6685 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
6686 This function updates the information recorded for the data references in
6687 the loop to account for the fact that the first NITERS iterations had
6688 already been executed. Specifically, it updates the initial_condition of
6689 the access_function of all the data_references in the loop. */
6692 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
6695 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
6696 struct data_reference *dr;
6698 if (vect_print_dump_info (REPORT_DETAILS))
6699 fprintf (vect_dump, "=== vect_update_inits_of_dr ===");
6701 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
6702 vect_update_init_of_dr (dr, niters);
6706 /* Function vect_do_peeling_for_alignment
6708 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
6709 'niters' is set to the misalignment of one of the data references in the
6710 loop, thereby forcing it to refer to an aligned location at the beginning
6711 of the execution of this loop. The data reference for which we are
6712 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
6715 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
6717 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6718 tree niters_of_prolog_loop, ni_name;
6720 struct loop *new_loop;
6722 if (vect_print_dump_info (REPORT_DETAILS))
6723 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ===");
6725 initialize_original_copy_tables ();
6727 ni_name = vect_build_loop_niters (loop_vinfo);
6728 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
6730 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
6732 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop),
6733 niters_of_prolog_loop, ni_name, true, 0);
6734 gcc_assert (new_loop);
6735 #ifdef ENABLE_CHECKING
6736 slpeel_verify_cfg_after_peeling (new_loop, loop);
6739 /* Update number of times loop executes. */
6740 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
6741 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR,
6742 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
6744 /* Update the init conditions of the access functions of all data refs. */
6745 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
6747 /* After peeling we have to reset scalar evolution analyzer. */
6750 free_original_copy_tables ();
6754 /* Function vect_create_cond_for_align_checks.
6756 Create a conditional expression that represents the alignment checks for
6757 all of data references (array element references) whose alignment must be
6761 LOOP_VINFO - two fields of the loop information are used.
6762 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
6763 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
6766 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6768 The returned value is the conditional expression to be used in the if
6769 statement that controls which version of the loop gets executed at runtime.
6771 The algorithm makes two assumptions:
6772 1) The number of bytes "n" in a vector is a power of 2.
6773 2) An address "a" is aligned if a%n is zero and that this
6774 test can be done as a&(n-1) == 0. For example, for 16
6775 byte vectors the test is a&0xf == 0. */
6778 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
6779 tree *cond_expr_stmt_list)
6781 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6782 VEC(tree,heap) *may_misalign_stmts
6783 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
6785 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
6789 tree int_ptrsize_type;
6791 tree or_tmp_name = NULL_TREE;
6792 tree and_tmp, and_tmp_name, and_stmt;
6795 /* Check that mask is one less than a power of 2, i.e., mask is
6796 all zeros followed by all ones. */
6797 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
6799 /* CHECKME: what is the best integer or unsigned type to use to hold a
6800 cast from a pointer value? */
6801 psize = TYPE_SIZE (ptr_type_node);
6803 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0);
6805 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
6806 of the first vector of the i'th data reference. */
6808 for (i = 0; VEC_iterate (tree, may_misalign_stmts, i, ref_stmt); i++)
6810 tree new_stmt_list = NULL_TREE;
6812 tree addr_tmp, addr_tmp_name, addr_stmt;
6813 tree or_tmp, new_or_tmp_name, or_stmt;
6815 /* create: addr_tmp = (int)(address_of_first_vector) */
6816 addr_base = vect_create_addr_base_for_vector_ref (ref_stmt,
6817 &new_stmt_list, NULL_TREE, loop);
6819 if (new_stmt_list != NULL_TREE)
6820 append_to_statement_list_force (new_stmt_list, cond_expr_stmt_list);
6822 sprintf (tmp_name, "%s%d", "addr2int", i);
6823 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6824 add_referenced_var (addr_tmp);
6825 addr_tmp_name = make_ssa_name (addr_tmp, NULL_TREE);
6826 addr_stmt = fold_convert (int_ptrsize_type, addr_base);
6827 addr_stmt = build_gimple_modify_stmt (addr_tmp_name, addr_stmt);
6828 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt;
6829 append_to_statement_list_force (addr_stmt, cond_expr_stmt_list);
6831 /* The addresses are OR together. */
6833 if (or_tmp_name != NULL_TREE)
6835 /* create: or_tmp = or_tmp | addr_tmp */
6836 sprintf (tmp_name, "%s%d", "orptrs", i);
6837 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name);
6838 add_referenced_var (or_tmp);
6839 new_or_tmp_name = make_ssa_name (or_tmp, NULL_TREE);
6840 tmp = build2 (BIT_IOR_EXPR, int_ptrsize_type,
6841 or_tmp_name, addr_tmp_name);
6842 or_stmt = build_gimple_modify_stmt (new_or_tmp_name, tmp);
6843 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt;
6844 append_to_statement_list_force (or_stmt, cond_expr_stmt_list);
6845 or_tmp_name = new_or_tmp_name;
6848 or_tmp_name = addr_tmp_name;
6852 mask_cst = build_int_cst (int_ptrsize_type, mask);
6854 /* create: and_tmp = or_tmp & mask */
6855 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" );
6856 add_referenced_var (and_tmp);
6857 and_tmp_name = make_ssa_name (and_tmp, NULL_TREE);
6859 tmp = build2 (BIT_AND_EXPR, int_ptrsize_type, or_tmp_name, mask_cst);
6860 and_stmt = build_gimple_modify_stmt (and_tmp_name, tmp);
6861 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt;
6862 append_to_statement_list_force (and_stmt, cond_expr_stmt_list);
6864 /* Make and_tmp the left operand of the conditional test against zero.
6865 if and_tmp has a nonzero bit then some address is unaligned. */
6866 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
6867 return build2 (EQ_EXPR, boolean_type_node,
6868 and_tmp_name, ptrsize_zero);
6871 /* Function vect_vfa_segment_size.
6873 Create an expression that computes the size of segment
6874 that will be accessed for a data reference. The functions takes into
6875 account that realignment loads may access one more vector.
6878 DR: The data reference.
6879 VECT_FACTOR: vectorization factor.
6881 Return an expression whose value is the size of segment which will be
6885 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor)
6887 tree segment_length = fold_build2 (MULT_EXPR, integer_type_node,
6888 DR_STEP (dr), vect_factor);
6890 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized)
6892 tree vector_size = TYPE_SIZE_UNIT
6893 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
6895 segment_length = fold_build2 (PLUS_EXPR, integer_type_node,
6896 segment_length, vector_size);
6898 return fold_convert (sizetype, segment_length);
6901 /* Function vect_create_cond_for_alias_checks.
6903 Create a conditional expression that represents the run-time checks for
6904 overlapping of address ranges represented by a list of data references
6905 relations passed as input.
6908 COND_EXPR - input conditional expression. New conditions will be chained
6909 with logical and operation.
6910 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
6914 COND_EXPR - conditional expression.
6915 COND_EXPR_STMT_LIST - statements needed to construct the conditional
6919 The returned value is the conditional expression to be used in the if
6920 statement that controls which version of the loop gets executed at runtime.
6924 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo,
6926 tree * cond_expr_stmt_list)
6928 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6929 VEC (ddr_p, heap) * may_alias_ddrs =
6930 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
6932 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo));
6936 tree part_cond_expr;
6938 /* Create expression
6939 ((store_ptr_0 + store_segment_length_0) < load_ptr_0)
6940 || (load_ptr_0 + load_segment_length_0) < store_ptr_0))
6944 ((store_ptr_n + store_segment_length_n) < load_ptr_n)
6945 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */
6947 if (VEC_empty (ddr_p, may_alias_ddrs))
6950 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++)
6952 struct data_reference *dr_a, *dr_b;
6953 tree dr_group_first_a, dr_group_first_b;
6954 tree addr_base_a, addr_base_b;
6955 tree segment_length_a, segment_length_b;
6956 tree stmt_a, stmt_b;
6959 stmt_a = DR_STMT (DDR_A (ddr));
6960 dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a));
6961 if (dr_group_first_a)
6963 stmt_a = dr_group_first_a;
6964 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
6968 stmt_b = DR_STMT (DDR_B (ddr));
6969 dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b));
6970 if (dr_group_first_b)
6972 stmt_b = dr_group_first_b;
6973 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
6977 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list,
6980 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list,
6983 segment_length_a = vect_vfa_segment_size (dr_a, vect_factor);
6984 segment_length_b = vect_vfa_segment_size (dr_b, vect_factor);
6986 if (vect_print_dump_info (REPORT_DR_DETAILS))
6989 "create runtime check for data references ");
6990 print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM);
6991 fprintf (vect_dump, " and ");
6992 print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM);
6997 fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
6998 fold_build2 (LT_EXPR, boolean_type_node,
6999 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a),
7003 fold_build2 (LT_EXPR, boolean_type_node,
7004 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b),
7010 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
7011 *cond_expr, part_cond_expr);
7013 *cond_expr = part_cond_expr;
7015 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7016 fprintf (vect_dump, "created %u versioning for alias checks.\n",
7017 VEC_length (ddr_p, may_alias_ddrs));
7021 /* Function vect_loop_versioning.
7023 If the loop has data references that may or may not be aligned or/and
7024 has data reference relations whose independence was not proven then
7025 two versions of the loop need to be generated, one which is vectorized
7026 and one which isn't. A test is then generated to control which of the
7027 loops is executed. The test checks for the alignment of all of the
7028 data references that may or may not be aligned. An additional
7029 sequence of runtime tests is generated for each pairs of DDRs whose
7030 independence was not proven. The vectorized version of loop is
7031 executed only if both alias and alignment tests are passed. */
7034 vect_loop_versioning (loop_vec_info loop_vinfo)
7036 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7038 tree cond_expr = NULL_TREE;
7039 tree cond_expr_stmt_list = NULL_TREE;
7040 basic_block condition_bb;
7041 block_stmt_iterator cond_exp_bsi;
7042 basic_block merge_bb;
7043 basic_block new_exit_bb;
7045 tree orig_phi, new_phi, arg;
7046 unsigned prob = 4 * REG_BR_PROB_BASE / 5;
7047 tree gimplify_stmt_list;
7049 if (!VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
7050 && !VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7053 if (VEC_length (tree, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
7055 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr_stmt_list);
7057 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
7058 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr, &cond_expr_stmt_list);
7061 fold_build2 (NE_EXPR, boolean_type_node, cond_expr, integer_zero_node);
7063 force_gimple_operand (cond_expr, &gimplify_stmt_list, true,
7065 append_to_statement_list (gimplify_stmt_list, &cond_expr_stmt_list);
7067 initialize_original_copy_tables ();
7068 nloop = loop_version (loop, cond_expr, &condition_bb,
7069 prob, prob, REG_BR_PROB_BASE - prob, true);
7070 free_original_copy_tables();
7072 /* Loop versioning violates an assumption we try to maintain during
7073 vectorization - that the loop exit block has a single predecessor.
7074 After versioning, the exit block of both loop versions is the same
7075 basic block (i.e. it has two predecessors). Just in order to simplify
7076 following transformations in the vectorizer, we fix this situation
7077 here by adding a new (empty) block on the exit-edge of the loop,
7078 with the proper loop-exit phis to maintain loop-closed-form. */
7080 merge_bb = single_exit (loop)->dest;
7081 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2);
7082 new_exit_bb = split_edge (single_exit (loop));
7083 new_exit_e = single_exit (loop);
7084 e = EDGE_SUCC (new_exit_bb, 0);
7086 for (orig_phi = phi_nodes (merge_bb); orig_phi;
7087 orig_phi = PHI_CHAIN (orig_phi))
7089 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
7091 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
7092 add_phi_arg (new_phi, arg, new_exit_e);
7093 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi));
7096 /* End loop-exit-fixes after versioning. */
7098 update_ssa (TODO_update_ssa);
7099 if (cond_expr_stmt_list)
7101 cond_exp_bsi = bsi_last (condition_bb);
7102 bsi_insert_before (&cond_exp_bsi, cond_expr_stmt_list, BSI_SAME_STMT);
7106 /* Remove a group of stores (for SLP or interleaving), free their
7110 vect_remove_stores (tree first_stmt)
7113 tree next = first_stmt;
7115 stmt_vec_info next_stmt_info;
7116 block_stmt_iterator next_si;
7120 /* Free the attached stmt_vec_info and remove the stmt. */
7121 next_si = bsi_for_stmt (next);
7122 bsi_remove (&next_si, true);
7123 next_stmt_info = vinfo_for_stmt (next);
7124 ann = stmt_ann (next);
7125 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7126 free (next_stmt_info);
7127 set_stmt_info (ann, NULL);
7133 /* Vectorize SLP instance tree in postorder. */
7136 vect_schedule_slp_instance (slp_tree node, unsigned int vec_stmts_size)
7139 bool strided_store, is_store;
7140 block_stmt_iterator si;
7141 stmt_vec_info stmt_info;
7146 vect_schedule_slp_instance (SLP_TREE_LEFT (node), vec_stmts_size);
7147 vect_schedule_slp_instance (SLP_TREE_RIGHT (node), vec_stmts_size);
7149 stmt = VEC_index(tree, SLP_TREE_SCALAR_STMTS (node), 0);
7150 stmt_info = vinfo_for_stmt (stmt);
7151 SLP_TREE_VEC_STMTS (node) = VEC_alloc (tree, heap, vec_stmts_size);
7152 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
7154 if (vect_print_dump_info (REPORT_DETAILS))
7156 fprintf (vect_dump, "------>vectorizing SLP node starting from: ");
7157 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7160 si = bsi_for_stmt (stmt);
7161 is_store = vect_transform_stmt (stmt, &si, &strided_store, node);
7164 if (DR_GROUP_FIRST_DR (stmt_info))
7165 /* If IS_STORE is TRUE, the vectorization of the
7166 interleaving chain was completed - free all the stores in
7168 vect_remove_stores (DR_GROUP_FIRST_DR (stmt_info));
7170 /* FORNOW: SLP originates only from strided stores. */
7176 /* FORNOW: SLP originates only from strided stores. */
7182 vect_schedule_slp (loop_vec_info loop_vinfo, unsigned int nunits)
7184 VEC (slp_instance, heap) *slp_instances =
7185 LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
7186 slp_instance instance;
7187 unsigned int vec_stmts_size;
7188 unsigned int group_size, i;
7189 unsigned int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7190 bool is_store = false;
7192 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
7194 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
7195 /* For each SLP instance calculate number of vector stmts to be created
7196 for the scalar stmts in each node of the SLP tree. Number of vector
7197 elements in one vector iteration is the number of scalar elements in
7198 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
7200 vec_stmts_size = vectorization_factor * group_size / nunits;
7202 /* Schedule the tree of INSTANCE. */
7203 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
7206 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
7207 || vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
7208 fprintf (vect_dump, "vectorizing stmts using SLP.");
7214 /* Function vect_transform_loop.
7216 The analysis phase has determined that the loop is vectorizable.
7217 Vectorize the loop - created vectorized stmts to replace the scalar
7218 stmts in the loop, and update the loop exit condition. */
7221 vect_transform_loop (loop_vec_info loop_vinfo)
7223 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7224 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7225 int nbbs = loop->num_nodes;
7226 block_stmt_iterator si, next_si;
7229 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7231 bool slp_scheduled = false;
7232 unsigned int nunits;
7234 if (vect_print_dump_info (REPORT_DETAILS))
7235 fprintf (vect_dump, "=== vec_transform_loop ===");
7236 vect_loop_versioning (loop_vinfo);
7238 /* CHECKME: we wouldn't need this if we called update_ssa once
7240 bitmap_zero (vect_memsyms_to_rename);
7242 /* Peel the loop if there are data refs with unknown alignment.
7243 Only one data ref with unknown store is allowed. */
7245 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo))
7246 vect_do_peeling_for_alignment (loop_vinfo);
7248 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
7249 compile time constant), or it is a constant that doesn't divide by the
7250 vectorization factor, then an epilog loop needs to be created.
7251 We therefore duplicate the loop: the original loop will be vectorized,
7252 and will compute the first (n/VF) iterations. The second copy of the loop
7253 will remain scalar and will compute the remaining (n%VF) iterations.
7254 (VF is the vectorization factor). */
7256 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7257 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7258 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
7259 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio);
7261 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
7262 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
7264 /* 1) Make sure the loop header has exactly two entries
7265 2) Make sure we have a preheader basic block. */
7267 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7269 split_edge (loop_preheader_edge (loop));
7271 /* FORNOW: the vectorizer supports only loops which body consist
7272 of one basic block (header + empty latch). When the vectorizer will
7273 support more involved loop forms, the order by which the BBs are
7274 traversed need to be reconsidered. */
7276 for (i = 0; i < nbbs; i++)
7278 basic_block bb = bbs[i];
7279 stmt_vec_info stmt_info;
7282 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
7284 if (vect_print_dump_info (REPORT_DETAILS))
7286 fprintf (vect_dump, "------>vectorizing phi: ");
7287 print_generic_expr (vect_dump, phi, TDF_SLIM);
7289 stmt_info = vinfo_for_stmt (phi);
7293 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7294 && !STMT_VINFO_LIVE_P (stmt_info))
7297 if ((TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info))
7298 != (unsigned HOST_WIDE_INT) vectorization_factor)
7299 && vect_print_dump_info (REPORT_DETAILS))
7300 fprintf (vect_dump, "multiple-types.");
7302 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def)
7304 if (vect_print_dump_info (REPORT_DETAILS))
7305 fprintf (vect_dump, "transform phi.");
7306 vect_transform_stmt (phi, NULL, NULL, NULL);
7310 for (si = bsi_start (bb); !bsi_end_p (si);)
7312 tree stmt = bsi_stmt (si);
7315 if (vect_print_dump_info (REPORT_DETAILS))
7317 fprintf (vect_dump, "------>vectorizing statement: ");
7318 print_generic_expr (vect_dump, stmt, TDF_SLIM);
7321 stmt_info = vinfo_for_stmt (stmt);
7323 /* vector stmts created in the outer-loop during vectorization of
7324 stmts in an inner-loop may not have a stmt_info, and do not
7325 need to be vectorized. */
7332 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7333 && !STMT_VINFO_LIVE_P (stmt_info))
7339 gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
7341 (unsigned int) TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7342 if (!STMT_SLP_TYPE (stmt_info)
7343 && nunits != (unsigned int) vectorization_factor
7344 && vect_print_dump_info (REPORT_DETAILS))
7345 /* For SLP VF is set according to unrolling factor, and not to
7346 vector size, hence for SLP this print is not valid. */
7347 fprintf (vect_dump, "multiple-types.");
7349 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7351 if (STMT_SLP_TYPE (stmt_info))
7355 slp_scheduled = true;
7357 if (vect_print_dump_info (REPORT_DETAILS))
7358 fprintf (vect_dump, "=== scheduling SLP instances ===");
7360 is_store = vect_schedule_slp (loop_vinfo, nunits);
7362 /* IS_STORE is true if STMT is a store. Stores cannot be of
7363 hybrid SLP type. They are removed in
7364 vect_schedule_slp_instance and their vinfo is destroyed. */
7372 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7373 if (PURE_SLP_STMT (stmt_info))
7380 /* -------- vectorize statement ------------ */
7381 if (vect_print_dump_info (REPORT_DETAILS))
7382 fprintf (vect_dump, "transform statement.");
7384 strided_store = false;
7385 is_store = vect_transform_stmt (stmt, &si, &strided_store, NULL);
7389 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
7391 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7392 interleaving chain was completed - free all the stores in
7394 tree next = DR_GROUP_FIRST_DR (stmt_info);
7396 stmt_vec_info next_stmt_info;
7400 next_si = bsi_for_stmt (next);
7401 next_stmt_info = vinfo_for_stmt (next);
7402 /* Free the attached stmt_vec_info and remove the stmt. */
7403 ann = stmt_ann (next);
7404 tmp = DR_GROUP_NEXT_DR (next_stmt_info);
7405 free (next_stmt_info);
7406 set_stmt_info (ann, NULL);
7407 bsi_remove (&next_si, true);
7410 bsi_remove (&si, true);
7415 /* Free the attached stmt_vec_info and remove the stmt. */
7416 ann = stmt_ann (stmt);
7418 set_stmt_info (ann, NULL);
7419 bsi_remove (&si, true);
7427 slpeel_make_loop_iterate_ntimes (loop, ratio);
7429 mark_set_for_renaming (vect_memsyms_to_rename);
7431 /* The memory tags and pointers in vectorized statements need to
7432 have their SSA forms updated. FIXME, why can't this be delayed
7433 until all the loops have been transformed? */
7434 update_ssa (TODO_update_ssa);
7436 if (vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7437 fprintf (vect_dump, "LOOP VECTORIZED.");
7438 if (loop->inner && vect_print_dump_info (REPORT_VECTORIZED_LOOPS))
7439 fprintf (vect_dump, "OUTER LOOP VECTORIZED.");