1 /* Transformation Utilities for Loop Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "diagnostic.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
39 #include "tree-data-ref.h"
40 #include "tree-chrec.h"
41 #include "tree-scalar-evolution.h"
42 #include "tree-vectorizer.h"
43 #include "langhooks.h"
44 #include "tree-pass.h"
48 /* Utility functions for the code transformation. */
49 static bool vect_transform_stmt (gimple, gimple_stmt_iterator *, bool *,
51 static tree vect_create_destination_var (tree, tree);
52 static tree vect_create_data_ref_ptr
53 (gimple, struct loop*, tree, tree *, gimple *, bool, bool *);
54 static tree vect_create_addr_base_for_vector_ref
55 (gimple, gimple_seq *, tree, struct loop *);
56 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
57 static tree vect_get_vec_def_for_operand (tree, gimple, tree *);
58 static tree vect_init_vector (gimple, tree, tree, gimple_stmt_iterator *);
59 static void vect_finish_stmt_generation
60 (gimple stmt, gimple vec_stmt, gimple_stmt_iterator *);
61 static bool vect_is_simple_cond (tree, loop_vec_info);
62 static void vect_create_epilog_for_reduction
63 (tree, gimple, int, enum tree_code, gimple);
64 static tree get_initial_def_for_reduction (gimple, tree, tree *);
66 /* Utility function dealing with loop peeling (not peeling itself). */
67 static void vect_generate_tmps_on_preheader
68 (loop_vec_info, tree *, tree *, tree *);
69 static tree vect_build_loop_niters (loop_vec_info);
70 static void vect_update_ivs_after_vectorizer (loop_vec_info, tree, edge);
71 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
72 static void vect_update_init_of_dr (struct data_reference *, tree niters);
73 static void vect_update_inits_of_drs (loop_vec_info, tree);
74 static int vect_min_worthwhile_factor (enum tree_code);
78 cost_for_stmt (gimple stmt)
80 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
82 switch (STMT_VINFO_TYPE (stmt_info))
84 case load_vec_info_type:
85 return TARG_SCALAR_LOAD_COST;
86 case store_vec_info_type:
87 return TARG_SCALAR_STORE_COST;
88 case op_vec_info_type:
89 case condition_vec_info_type:
90 case assignment_vec_info_type:
91 case reduc_vec_info_type:
92 case induc_vec_info_type:
93 case type_promotion_vec_info_type:
94 case type_demotion_vec_info_type:
95 case type_conversion_vec_info_type:
96 case call_vec_info_type:
97 return TARG_SCALAR_STMT_COST;
98 case undef_vec_info_type:
105 /* Function vect_estimate_min_profitable_iters
107 Return the number of iterations required for the vector version of the
108 loop to be profitable relative to the cost of the scalar version of the
111 TODO: Take profile info into account before making vectorization
112 decisions, if available. */
115 vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo)
118 int min_profitable_iters;
119 int peel_iters_prologue;
120 int peel_iters_epilogue;
121 int vec_inside_cost = 0;
122 int vec_outside_cost = 0;
123 int scalar_single_iter_cost = 0;
124 int scalar_outside_cost = 0;
125 bool runtime_test = false;
126 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
127 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
128 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
129 int nbbs = loop->num_nodes;
130 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo);
131 int peel_guard_costs = 0;
132 int innerloop_iters = 0, factor;
133 VEC (slp_instance, heap) *slp_instances;
134 slp_instance instance;
136 /* Cost model disabled. */
137 if (!flag_vect_cost_model)
139 if (vect_print_dump_info (REPORT_COST))
140 fprintf (vect_dump, "cost model disabled.");
144 /* If the number of iterations is unknown, or the
145 peeling-for-misalignment amount is unknown, we will have to generate
146 a runtime test to test the loop count against the threshold. */
147 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
148 || (byte_misalign < 0))
151 /* Requires loop versioning tests to handle misalignment. */
153 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)))
155 /* FIXME: Make cost depend on complexity of individual check. */
157 VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
158 if (vect_print_dump_info (REPORT_COST))
159 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
160 "versioning to treat misalignment.\n");
163 if (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
165 /* FIXME: Make cost depend on complexity of individual check. */
167 VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
168 if (vect_print_dump_info (REPORT_COST))
169 fprintf (vect_dump, "cost model: Adding cost of checks for loop "
170 "versioning aliasing.\n");
173 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
174 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
176 vec_outside_cost += TARG_COND_TAKEN_BRANCH_COST;
179 /* Count statements in scalar loop. Using this as scalar cost for a single
182 TODO: Add outer loop support.
184 TODO: Consider assigning different costs to different scalar
189 innerloop_iters = 50; /* FIXME */
191 for (i = 0; i < nbbs; i++)
193 gimple_stmt_iterator si;
194 basic_block bb = bbs[i];
196 if (bb->loop_father == loop->inner)
197 factor = innerloop_iters;
201 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
203 gimple stmt = gsi_stmt (si);
204 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
205 /* Skip stmts that are not vectorized inside the loop. */
206 if (!STMT_VINFO_RELEVANT_P (stmt_info)
207 && (!STMT_VINFO_LIVE_P (stmt_info)
208 || STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def))
210 scalar_single_iter_cost += cost_for_stmt (stmt) * factor;
211 vec_inside_cost += STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) * factor;
212 /* FIXME: for stmts in the inner-loop in outer-loop vectorization,
213 some of the "outside" costs are generated inside the outer-loop. */
214 vec_outside_cost += STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info);
218 /* Add additional cost for the peeled instructions in prologue and epilogue
221 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
222 at compile-time - we assume it's vf/2 (the worst would be vf-1).
224 TODO: Build an expression that represents peel_iters for prologue and
225 epilogue to be used in a run-time test. */
227 if (byte_misalign < 0)
229 peel_iters_prologue = vf/2;
230 if (vect_print_dump_info (REPORT_COST))
231 fprintf (vect_dump, "cost model: "
232 "prologue peel iters set to vf/2.");
234 /* If peeling for alignment is unknown, loop bound of main loop becomes
236 peel_iters_epilogue = vf/2;
237 if (vect_print_dump_info (REPORT_COST))
238 fprintf (vect_dump, "cost model: "
239 "epilogue peel iters set to vf/2 because "
240 "peeling for alignment is unknown .");
242 /* If peeled iterations are unknown, count a taken branch and a not taken
243 branch per peeled loop. Even if scalar loop iterations are known,
244 vector iterations are not known since peeled prologue iterations are
245 not known. Hence guards remain the same. */
246 peel_guard_costs += 2 * (TARG_COND_TAKEN_BRANCH_COST
247 + TARG_COND_NOT_TAKEN_BRANCH_COST);
254 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
255 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
256 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
257 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
259 peel_iters_prologue = nelements - (byte_misalign / element_size);
262 peel_iters_prologue = 0;
264 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
266 peel_iters_epilogue = vf/2;
267 if (vect_print_dump_info (REPORT_COST))
268 fprintf (vect_dump, "cost model: "
269 "epilogue peel iters set to vf/2 because "
270 "loop iterations are unknown .");
272 /* If peeled iterations are known but number of scalar loop
273 iterations are unknown, count a taken branch per peeled loop. */
274 peel_guard_costs += 2 * TARG_COND_TAKEN_BRANCH_COST;
279 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
280 peel_iters_prologue = niters < peel_iters_prologue ?
281 niters : peel_iters_prologue;
282 peel_iters_epilogue = (niters - peel_iters_prologue) % vf;
286 vec_outside_cost += (peel_iters_prologue * scalar_single_iter_cost)
287 + (peel_iters_epilogue * scalar_single_iter_cost)
290 /* FORNOW: The scalar outside cost is incremented in one of the
293 1. The vectorizer checks for alignment and aliasing and generates
294 a condition that allows dynamic vectorization. A cost model
295 check is ANDED with the versioning condition. Hence scalar code
296 path now has the added cost of the versioning check.
298 if (cost > th & versioning_check)
301 Hence run-time scalar is incremented by not-taken branch cost.
303 2. The vectorizer then checks if a prologue is required. If the
304 cost model check was not done before during versioning, it has to
305 be done before the prologue check.
308 prologue = scalar_iters
313 if (prologue == num_iters)
316 Hence the run-time scalar cost is incremented by a taken branch,
317 plus a not-taken branch, plus a taken branch cost.
319 3. The vectorizer then checks if an epilogue is required. If the
320 cost model check was not done before during prologue check, it
321 has to be done with the epilogue check.
327 if (prologue == num_iters)
330 if ((cost <= th) | (scalar_iters-prologue-epilogue == 0))
333 Hence the run-time scalar cost should be incremented by 2 taken
336 TODO: The back end may reorder the BBS's differently and reverse
337 conditions/branch directions. Change the estimates below to
338 something more reasonable. */
342 /* Cost model check occurs at versioning. */
343 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
344 || VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)))
345 scalar_outside_cost += TARG_COND_NOT_TAKEN_BRANCH_COST;
348 /* Cost model occurs at prologue generation. */
349 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
350 scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST
351 + TARG_COND_NOT_TAKEN_BRANCH_COST;
352 /* Cost model check occurs at epilogue generation. */
354 scalar_outside_cost += 2 * TARG_COND_TAKEN_BRANCH_COST;
359 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
360 for (i = 0; VEC_iterate (slp_instance, slp_instances, i, instance); i++)
362 vec_outside_cost += SLP_INSTANCE_OUTSIDE_OF_LOOP_COST (instance);
363 vec_inside_cost += SLP_INSTANCE_INSIDE_OF_LOOP_COST (instance);
366 /* Calculate number of iterations required to make the vector version
367 profitable, relative to the loop bodies only. The following condition
369 SIC * niters + SOC > VIC * ((niters-PL_ITERS-EP_ITERS)/VF) + VOC
371 SIC = scalar iteration cost, VIC = vector iteration cost,
372 VOC = vector outside cost, VF = vectorization factor,
373 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
374 SOC = scalar outside cost for run time cost model check. */
376 if ((scalar_single_iter_cost * vf) > vec_inside_cost)
378 if (vec_outside_cost <= 0)
379 min_profitable_iters = 1;
382 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf
383 - vec_inside_cost * peel_iters_prologue
384 - vec_inside_cost * peel_iters_epilogue)
385 / ((scalar_single_iter_cost * vf)
388 if ((scalar_single_iter_cost * vf * min_profitable_iters)
389 <= ((vec_inside_cost * min_profitable_iters)
390 + ((vec_outside_cost - scalar_outside_cost) * vf)))
391 min_profitable_iters++;
394 /* vector version will never be profitable. */
397 if (vect_print_dump_info (REPORT_COST))
398 fprintf (vect_dump, "cost model: vector iteration cost = %d "
399 "is divisible by scalar iteration cost = %d by a factor "
400 "greater than or equal to the vectorization factor = %d .",
401 vec_inside_cost, scalar_single_iter_cost, vf);
405 if (vect_print_dump_info (REPORT_COST))
407 fprintf (vect_dump, "Cost model analysis: \n");
408 fprintf (vect_dump, " Vector inside of loop cost: %d\n",
410 fprintf (vect_dump, " Vector outside of loop cost: %d\n",
412 fprintf (vect_dump, " Scalar iteration cost: %d\n",
413 scalar_single_iter_cost);
414 fprintf (vect_dump, " Scalar outside cost: %d\n", scalar_outside_cost);
415 fprintf (vect_dump, " prologue iterations: %d\n",
416 peel_iters_prologue);
417 fprintf (vect_dump, " epilogue iterations: %d\n",
418 peel_iters_epilogue);
419 fprintf (vect_dump, " Calculated minimum iters for profitability: %d\n",
420 min_profitable_iters);
423 min_profitable_iters =
424 min_profitable_iters < vf ? vf : min_profitable_iters;
426 /* Because the condition we create is:
427 if (niters <= min_profitable_iters)
428 then skip the vectorized loop. */
429 min_profitable_iters--;
431 if (vect_print_dump_info (REPORT_COST))
432 fprintf (vect_dump, " Profitability threshold = %d\n",
433 min_profitable_iters);
435 return min_profitable_iters;
439 /* TODO: Close dependency between vect_model_*_cost and vectorizable_*
440 functions. Design better to avoid maintenance issues. */
442 /* Function vect_model_reduction_cost.
444 Models cost for a reduction operation, including the vector ops
445 generated within the strip-mine loop, the initial definition before
446 the loop, and the epilogue code that must be generated. */
449 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
456 gimple stmt, orig_stmt;
458 enum machine_mode mode;
459 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
460 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
463 /* Cost of reduction op inside loop. */
464 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) += ncopies * TARG_VEC_STMT_COST;
466 stmt = STMT_VINFO_STMT (stmt_info);
468 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
470 case GIMPLE_SINGLE_RHS:
471 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
472 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
474 case GIMPLE_UNARY_RHS:
475 reduction_op = gimple_assign_rhs1 (stmt);
477 case GIMPLE_BINARY_RHS:
478 reduction_op = gimple_assign_rhs2 (stmt);
484 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
487 if (vect_print_dump_info (REPORT_COST))
489 fprintf (vect_dump, "unsupported data-type ");
490 print_generic_expr (vect_dump, TREE_TYPE (reduction_op), TDF_SLIM);
495 mode = TYPE_MODE (vectype);
496 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
499 orig_stmt = STMT_VINFO_STMT (stmt_info);
501 code = gimple_assign_rhs_code (orig_stmt);
503 /* Add in cost for initial definition. */
504 outer_cost += TARG_SCALAR_TO_VEC_COST;
506 /* Determine cost of epilogue code.
508 We have a reduction operator that will reduce the vector in one statement.
509 Also requires scalar extract. */
511 if (!nested_in_vect_loop_p (loop, orig_stmt))
513 if (reduc_code < NUM_TREE_CODES)
514 outer_cost += TARG_VEC_STMT_COST + TARG_VEC_TO_SCALAR_COST;
517 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
519 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt)));
520 int element_bitsize = tree_low_cst (bitsize, 1);
521 int nelements = vec_size_in_bits / element_bitsize;
523 optab = optab_for_tree_code (code, vectype, optab_default);
525 /* We have a whole vector shift available. */
526 if (VECTOR_MODE_P (mode)
527 && optab_handler (optab, mode)->insn_code != CODE_FOR_nothing
528 && optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
529 /* Final reduction via vector shifts and the reduction operator. Also
530 requires scalar extract. */
531 outer_cost += ((exact_log2(nelements) * 2) * TARG_VEC_STMT_COST
532 + TARG_VEC_TO_SCALAR_COST);
534 /* Use extracts and reduction op for final reduction. For N elements,
535 we have N extracts and N-1 reduction ops. */
536 outer_cost += ((nelements + nelements - 1) * TARG_VEC_STMT_COST);
540 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = outer_cost;
542 if (vect_print_dump_info (REPORT_COST))
543 fprintf (vect_dump, "vect_model_reduction_cost: inside_cost = %d, "
544 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
545 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
551 /* Function vect_model_induction_cost.
553 Models cost for induction operations. */
556 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies)
558 /* loop cost for vec_loop. */
559 STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info) = ncopies * TARG_VEC_STMT_COST;
560 /* prologue cost for vec_init and vec_step. */
561 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info) = 2 * TARG_SCALAR_TO_VEC_COST;
563 if (vect_print_dump_info (REPORT_COST))
564 fprintf (vect_dump, "vect_model_induction_cost: inside_cost = %d, "
565 "outside_cost = %d .", STMT_VINFO_INSIDE_OF_LOOP_COST (stmt_info),
566 STMT_VINFO_OUTSIDE_OF_LOOP_COST (stmt_info));
570 /* Function vect_model_simple_cost.
572 Models cost for simple operations, i.e. those that only emit ncopies of a
573 single op. Right now, this does not account for multiple insns that could
574 be generated for the single vector op. We will handle that shortly. */
577 vect_model_simple_cost (stmt_vec_info stmt_info, int ncopies,
578 enum vect_def_type *dt, slp_tree slp_node)
581 int inside_cost = 0, outside_cost = 0;
583 /* The SLP costs were already calculated during SLP tree build. */
584 if (PURE_SLP_STMT (stmt_info))
587 inside_cost = ncopies * TARG_VEC_STMT_COST;
589 /* FORNOW: Assuming maximum 2 args per stmts. */
590 for (i = 0; i < 2; i++)
592 if (dt[i] == vect_constant_def || dt[i] == vect_invariant_def)
593 outside_cost += TARG_SCALAR_TO_VEC_COST;
596 if (vect_print_dump_info (REPORT_COST))
597 fprintf (vect_dump, "vect_model_simple_cost: inside_cost = %d, "
598 "outside_cost = %d .", inside_cost, outside_cost);
600 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
601 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
602 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
606 /* Function vect_cost_strided_group_size
608 For strided load or store, return the group_size only if it is the first
609 load or store of a group, else return 1. This ensures that group size is
610 only returned once per group. */
613 vect_cost_strided_group_size (stmt_vec_info stmt_info)
615 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
617 if (first_stmt == STMT_VINFO_STMT (stmt_info))
618 return DR_GROUP_SIZE (stmt_info);
624 /* Function vect_model_store_cost
626 Models cost for stores. In the case of strided accesses, one access
627 has the overhead of the strided access attributed to it. */
630 vect_model_store_cost (stmt_vec_info stmt_info, int ncopies,
631 enum vect_def_type dt, slp_tree slp_node)
634 int inside_cost = 0, outside_cost = 0;
636 /* The SLP costs were already calculated during SLP tree build. */
637 if (PURE_SLP_STMT (stmt_info))
640 if (dt == vect_constant_def || dt == vect_invariant_def)
641 outside_cost = TARG_SCALAR_TO_VEC_COST;
643 /* Strided access? */
644 if (DR_GROUP_FIRST_DR (stmt_info) && !slp_node)
645 group_size = vect_cost_strided_group_size (stmt_info);
646 /* Not a strided access. */
650 /* Is this an access in a group of stores, which provide strided access?
651 If so, add in the cost of the permutes. */
654 /* Uses a high and low interleave operation for each needed permute. */
655 inside_cost = ncopies * exact_log2(group_size) * group_size
656 * TARG_VEC_STMT_COST;
658 if (vect_print_dump_info (REPORT_COST))
659 fprintf (vect_dump, "vect_model_store_cost: strided group_size = %d .",
664 /* Costs of the stores. */
665 inside_cost += ncopies * TARG_VEC_STORE_COST;
667 if (vect_print_dump_info (REPORT_COST))
668 fprintf (vect_dump, "vect_model_store_cost: inside_cost = %d, "
669 "outside_cost = %d .", inside_cost, outside_cost);
671 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
672 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
673 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
677 /* Function vect_model_load_cost
679 Models cost for loads. In the case of strided accesses, the last access
680 has the overhead of the strided access attributed to it. Since unaligned
681 accesses are supported for loads, we also account for the costs of the
682 access scheme chosen. */
685 vect_model_load_cost (stmt_vec_info stmt_info, int ncopies, slp_tree slp_node)
689 int alignment_support_cheme;
691 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info), *first_dr;
692 int inside_cost = 0, outside_cost = 0;
694 /* The SLP costs were already calculated during SLP tree build. */
695 if (PURE_SLP_STMT (stmt_info))
698 /* Strided accesses? */
699 first_stmt = DR_GROUP_FIRST_DR (stmt_info);
700 if (first_stmt && !slp_node)
702 group_size = vect_cost_strided_group_size (stmt_info);
703 first_dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt));
705 /* Not a strided access. */
712 alignment_support_cheme = vect_supportable_dr_alignment (first_dr);
714 /* Is this an access in a group of loads providing strided access?
715 If so, add in the cost of the permutes. */
718 /* Uses an even and odd extract operations for each needed permute. */
719 inside_cost = ncopies * exact_log2(group_size) * group_size
720 * TARG_VEC_STMT_COST;
722 if (vect_print_dump_info (REPORT_COST))
723 fprintf (vect_dump, "vect_model_load_cost: strided group_size = %d .",
728 /* The loads themselves. */
729 switch (alignment_support_cheme)
733 inside_cost += ncopies * TARG_VEC_LOAD_COST;
735 if (vect_print_dump_info (REPORT_COST))
736 fprintf (vect_dump, "vect_model_load_cost: aligned.");
740 case dr_unaligned_supported:
742 /* Here, we assign an additional cost for the unaligned load. */
743 inside_cost += ncopies * TARG_VEC_UNALIGNED_LOAD_COST;
745 if (vect_print_dump_info (REPORT_COST))
746 fprintf (vect_dump, "vect_model_load_cost: unaligned supported by "
751 case dr_explicit_realign:
753 inside_cost += ncopies * (2*TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
755 /* FIXME: If the misalignment remains fixed across the iterations of
756 the containing loop, the following cost should be added to the
758 if (targetm.vectorize.builtin_mask_for_load)
759 inside_cost += TARG_VEC_STMT_COST;
763 case dr_explicit_realign_optimized:
765 if (vect_print_dump_info (REPORT_COST))
766 fprintf (vect_dump, "vect_model_load_cost: unaligned software "
769 /* Unaligned software pipeline has a load of an address, an initial
770 load, and possibly a mask operation to "prime" the loop. However,
771 if this is an access in a group of loads, which provide strided
772 access, then the above cost should only be considered for one
773 access in the group. Inside the loop, there is a load op
774 and a realignment op. */
776 if ((!DR_GROUP_FIRST_DR (stmt_info)) || group_size > 1 || slp_node)
778 outside_cost = 2*TARG_VEC_STMT_COST;
779 if (targetm.vectorize.builtin_mask_for_load)
780 outside_cost += TARG_VEC_STMT_COST;
783 inside_cost += ncopies * (TARG_VEC_LOAD_COST + TARG_VEC_STMT_COST);
792 if (vect_print_dump_info (REPORT_COST))
793 fprintf (vect_dump, "vect_model_load_cost: inside_cost = %d, "
794 "outside_cost = %d .", inside_cost, outside_cost);
796 /* Set the costs either in STMT_INFO or SLP_NODE (if exists). */
797 stmt_vinfo_set_inside_of_loop_cost (stmt_info, slp_node, inside_cost);
798 stmt_vinfo_set_outside_of_loop_cost (stmt_info, slp_node, outside_cost);
802 /* Function vect_get_new_vect_var.
804 Returns a name for a new variable. The current naming scheme appends the
805 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
806 the name of vectorizer generated variables, and appends that to NAME if
810 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
817 case vect_simple_var:
820 case vect_scalar_var:
823 case vect_pointer_var:
832 char* tmp = concat (prefix, name, NULL);
833 new_vect_var = create_tmp_var (type, tmp);
837 new_vect_var = create_tmp_var (type, prefix);
839 /* Mark vector typed variable as a gimple register variable. */
840 if (TREE_CODE (type) == VECTOR_TYPE)
841 DECL_GIMPLE_REG_P (new_vect_var) = true;
847 /* Function vect_create_addr_base_for_vector_ref.
849 Create an expression that computes the address of the first memory location
850 that will be accessed for a data reference.
853 STMT: The statement containing the data reference.
854 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
855 OFFSET: Optional. If supplied, it is be added to the initial address.
856 LOOP: Specify relative to which loop-nest should the address be computed.
857 For example, when the dataref is in an inner-loop nested in an
858 outer-loop that is now being vectorized, LOOP can be either the
859 outer-loop, or the inner-loop. The first memory location accessed
860 by the following dataref ('in' points to short):
867 if LOOP=i_loop: &in (relative to i_loop)
868 if LOOP=j_loop: &in+i*2B (relative to j_loop)
871 1. Return an SSA_NAME whose value is the address of the memory location of
872 the first vector of the data reference.
873 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
874 these statement(s) which define the returned SSA_NAME.
876 FORNOW: We are only handling array accesses with step 1. */
879 vect_create_addr_base_for_vector_ref (gimple stmt,
880 gimple_seq *new_stmt_list,
884 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
885 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
886 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
887 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
889 tree data_ref_base_var;
891 tree addr_base, addr_expr;
893 gimple_seq seq = NULL;
894 tree base_offset = unshare_expr (DR_OFFSET (dr));
895 tree init = unshare_expr (DR_INIT (dr));
896 tree vect_ptr_type, addr_expr2;
897 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
900 if (loop != containing_loop)
902 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
903 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
905 gcc_assert (nested_in_vect_loop_p (loop, stmt));
907 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
908 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
909 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
912 /* Create data_ref_base */
913 base_name = build_fold_indirect_ref (data_ref_base);
914 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
915 add_referenced_var (data_ref_base_var);
916 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
918 gimple_seq_add_seq (new_stmt_list, seq);
920 /* Create base_offset */
921 base_offset = size_binop (PLUS_EXPR, base_offset, init);
922 base_offset = fold_convert (sizetype, base_offset);
923 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
924 add_referenced_var (dest);
925 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
926 gimple_seq_add_seq (new_stmt_list, seq);
930 tree tmp = create_tmp_var (sizetype, "offset");
932 add_referenced_var (tmp);
933 offset = fold_build2 (MULT_EXPR, TREE_TYPE (offset), offset, step);
934 base_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (base_offset),
935 base_offset, offset);
936 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
937 gimple_seq_add_seq (new_stmt_list, seq);
940 /* base + base_offset */
941 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
942 data_ref_base, base_offset);
944 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
946 /* addr_expr = addr_base */
947 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
948 get_name (base_name));
949 add_referenced_var (addr_expr);
950 vec_stmt = fold_convert (vect_ptr_type, addr_base);
951 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
952 get_name (base_name));
953 add_referenced_var (addr_expr2);
954 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr2);
955 gimple_seq_add_seq (new_stmt_list, seq);
957 if (vect_print_dump_info (REPORT_DETAILS))
959 fprintf (vect_dump, "created ");
960 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
966 /* Function vect_create_data_ref_ptr.
968 Create a new pointer to vector type (vp), that points to the first location
969 accessed in the loop by STMT, along with the def-use update chain to
970 appropriately advance the pointer through the loop iterations. Also set
971 aliasing information for the pointer. This vector pointer is used by the
972 callers to this function to create a memory reference expression for vector
976 1. STMT: a stmt that references memory. Expected to be of the form
977 GIMPLE_ASSIGN <name, data-ref> or
978 GIMPLE_ASSIGN <data-ref, name>.
979 2. AT_LOOP: the loop where the vector memref is to be created.
980 3. OFFSET (optional): an offset to be added to the initial address accessed
981 by the data-ref in STMT.
982 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
983 pointing to the initial address.
986 1. Declare a new ptr to vector_type, and have it point to the base of the
987 data reference (initial addressed accessed by the data reference).
988 For example, for vector of type V8HI, the following code is generated:
991 vp = (v8hi *)initial_address;
993 if OFFSET is not supplied:
994 initial_address = &a[init];
995 if OFFSET is supplied:
996 initial_address = &a[init + OFFSET];
998 Return the initial_address in INITIAL_ADDRESS.
1000 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
1001 update the pointer in each iteration of the loop.
1003 Return the increment stmt that updates the pointer in PTR_INCR.
1005 3. Set INV_P to true if the access pattern of the data reference in the
1006 vectorized loop is invariant. Set it to false otherwise.
1008 4. Return the pointer. */
1011 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
1012 tree offset, tree *initial_address, gimple *ptr_incr,
1013 bool only_init, bool *inv_p)
1016 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1017 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1018 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1019 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
1020 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
1021 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1027 gimple_seq new_stmt_list = NULL;
1031 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1033 gimple_stmt_iterator incr_gsi;
1035 tree indx_before_incr, indx_after_incr;
1039 /* Check the step (evolution) of the load in LOOP, and record
1040 whether it's invariant. */
1041 if (nested_in_vect_loop)
1042 step = STMT_VINFO_DR_STEP (stmt_info);
1044 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
1046 if (tree_int_cst_compare (step, size_zero_node) == 0)
1051 /* Create an expression for the first address accessed by this load
1053 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
1055 if (vect_print_dump_info (REPORT_DETAILS))
1057 tree data_ref_base = base_name;
1058 fprintf (vect_dump, "create vector-pointer variable to type: ");
1059 print_generic_expr (vect_dump, vectype, TDF_SLIM);
1060 if (TREE_CODE (data_ref_base) == VAR_DECL)
1061 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
1062 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
1063 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
1064 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
1065 fprintf (vect_dump, " vectorizing a record based array ref: ");
1066 else if (TREE_CODE (data_ref_base) == SSA_NAME)
1067 fprintf (vect_dump, " vectorizing a pointer ref: ");
1068 print_generic_expr (vect_dump, base_name, TDF_SLIM);
1071 /** (1) Create the new vector-pointer variable: **/
1072 vect_ptr_type = build_pointer_type (vectype);
1074 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
1075 get_name (base_name));
1076 add_referenced_var (vect_ptr);
1078 /** (2) Add aliasing information to the new vector-pointer:
1079 (The points-to info (DR_PTR_INFO) may be defined later.) **/
1081 tag = DR_SYMBOL_TAG (dr);
1084 /* If tag is a variable (and NOT_A_TAG) than a new symbol memory
1085 tag must be created with tag added to its may alias list. */
1087 new_type_alias (vect_ptr, tag, DR_REF (dr));
1089 set_symbol_mem_tag (vect_ptr, tag);
1091 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
1092 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
1093 def-use update cycles for the pointer: One relative to the outer-loop
1094 (LOOP), which is what steps (3) and (4) below do. The other is relative
1095 to the inner-loop (which is the inner-most loop containing the dataref),
1096 and this is done be step (5) below.
1098 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
1099 inner-most loop, and so steps (3),(4) work the same, and step (5) is
1100 redundant. Steps (3),(4) create the following:
1103 LOOP: vp1 = phi(vp0,vp2)
1109 If there is an inner-loop nested in loop, then step (5) will also be
1110 applied, and an additional update in the inner-loop will be created:
1113 LOOP: vp1 = phi(vp0,vp2)
1115 inner: vp3 = phi(vp1,vp4)
1116 vp4 = vp3 + inner_step
1122 /** (3) Calculate the initial address the vector-pointer, and set
1123 the vector-pointer to point to it before the loop: **/
1125 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1127 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1129 pe = loop_preheader_edge (loop);
1132 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
1133 gcc_assert (!new_bb);
1136 *initial_address = new_temp;
1138 /* Create: p = (vectype *) initial_base */
1139 vec_stmt = gimple_build_assign (vect_ptr,
1140 fold_convert (vect_ptr_type, new_temp));
1141 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
1142 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
1143 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
1144 gcc_assert (!new_bb);
1147 /** (4) Handle the updating of the vector-pointer inside the loop.
1148 This is needed when ONLY_INIT is false, and also when AT_LOOP
1149 is the inner-loop nested in LOOP (during outer-loop vectorization).
1152 if (only_init && at_loop == loop) /* No update in loop is required. */
1154 /* Copy the points-to information if it exists. */
1155 if (DR_PTR_INFO (dr))
1156 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
1157 vptr = vect_ptr_init;
1161 /* The step of the vector pointer is the Vector Size. */
1162 tree step = TYPE_SIZE_UNIT (vectype);
1163 /* One exception to the above is when the scalar step of the load in
1164 LOOP is zero. In this case the step here is also zero. */
1166 step = size_zero_node;
1168 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
1170 create_iv (vect_ptr_init,
1171 fold_convert (vect_ptr_type, step),
1172 NULL_TREE, loop, &incr_gsi, insert_after,
1173 &indx_before_incr, &indx_after_incr);
1174 incr = gsi_stmt (incr_gsi);
1175 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
1177 /* Copy the points-to information if it exists. */
1178 if (DR_PTR_INFO (dr))
1180 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1181 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1183 merge_alias_info (vect_ptr_init, indx_before_incr);
1184 merge_alias_info (vect_ptr_init, indx_after_incr);
1188 vptr = indx_before_incr;
1191 if (!nested_in_vect_loop || only_init)
1195 /** (5) Handle the updating of the vector-pointer inside the inner-loop
1196 nested in LOOP, if exists: **/
1198 gcc_assert (nested_in_vect_loop);
1201 standard_iv_increment_position (containing_loop, &incr_gsi,
1203 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), NULL_TREE,
1204 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
1206 incr = gsi_stmt (incr_gsi);
1207 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
1209 /* Copy the points-to information if it exists. */
1210 if (DR_PTR_INFO (dr))
1212 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
1213 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
1215 merge_alias_info (vect_ptr_init, indx_before_incr);
1216 merge_alias_info (vect_ptr_init, indx_after_incr);
1220 return indx_before_incr;
1227 /* Function bump_vector_ptr
1229 Increment a pointer (to a vector type) by vector-size. If requested,
1230 i.e. if PTR-INCR is given, then also connect the new increment stmt
1231 to the existing def-use update-chain of the pointer, by modifying
1232 the PTR_INCR as illustrated below:
1234 The pointer def-use update-chain before this function:
1235 DATAREF_PTR = phi (p_0, p_2)
1237 PTR_INCR: p_2 = DATAREF_PTR + step
1239 The pointer def-use update-chain after this function:
1240 DATAREF_PTR = phi (p_0, p_2)
1242 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
1244 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
1247 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
1249 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
1250 the loop. The increment amount across iterations is expected
1252 BSI - location where the new update stmt is to be placed.
1253 STMT - the original scalar memory-access stmt that is being vectorized.
1254 BUMP - optional. The offset by which to bump the pointer. If not given,
1255 the offset is assumed to be vector_size.
1257 Output: Return NEW_DATAREF_PTR as illustrated above.
1262 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
1263 gimple stmt, tree bump)
1265 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1266 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1267 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1268 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
1269 tree update = TYPE_SIZE_UNIT (vectype);
1272 use_operand_p use_p;
1273 tree new_dataref_ptr;
1278 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
1279 dataref_ptr, update);
1280 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
1281 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
1282 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
1284 /* Copy the points-to information if it exists. */
1285 if (DR_PTR_INFO (dr))
1286 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
1287 merge_alias_info (new_dataref_ptr, dataref_ptr);
1290 return new_dataref_ptr;
1292 /* Update the vector-pointer's cross-iteration increment. */
1293 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
1295 tree use = USE_FROM_PTR (use_p);
1297 if (use == dataref_ptr)
1298 SET_USE (use_p, new_dataref_ptr);
1300 gcc_assert (tree_int_cst_compare (use, update) == 0);
1303 return new_dataref_ptr;
1307 /* Function vect_create_destination_var.
1309 Create a new temporary of type VECTYPE. */
1312 vect_create_destination_var (tree scalar_dest, tree vectype)
1315 const char *new_name;
1317 enum vect_var_kind kind;
1319 kind = vectype ? vect_simple_var : vect_scalar_var;
1320 type = vectype ? vectype : TREE_TYPE (scalar_dest);
1322 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
1324 new_name = get_name (scalar_dest);
1327 vec_dest = vect_get_new_vect_var (type, kind, new_name);
1328 add_referenced_var (vec_dest);
1334 /* Function vect_init_vector.
1336 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1337 the vector elements of VECTOR_VAR. Place the initialization at BSI if it
1338 is not NULL. Otherwise, place the initialization at the loop preheader.
1339 Return the DEF of INIT_STMT.
1340 It will be used in the vectorization of STMT. */
1343 vect_init_vector (gimple stmt, tree vector_var, tree vector_type,
1344 gimple_stmt_iterator *gsi)
1346 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1354 new_var = vect_get_new_vect_var (vector_type, vect_simple_var, "cst_");
1355 add_referenced_var (new_var);
1356 init_stmt = gimple_build_assign (new_var, vector_var);
1357 new_temp = make_ssa_name (new_var, init_stmt);
1358 gimple_assign_set_lhs (init_stmt, new_temp);
1361 vect_finish_stmt_generation (stmt, init_stmt, gsi);
1364 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1365 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1367 if (nested_in_vect_loop_p (loop, stmt))
1369 pe = loop_preheader_edge (loop);
1370 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1371 gcc_assert (!new_bb);
1374 if (vect_print_dump_info (REPORT_DETAILS))
1376 fprintf (vect_dump, "created new init_stmt: ");
1377 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1380 vec_oprnd = gimple_assign_lhs (init_stmt);
1385 /* For constant and loop invariant defs of SLP_NODE this function returns
1386 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
1387 OP_NUM determines if we gather defs for operand 0 or operand 1 of the scalar
1388 stmts. NUMBER_OF_VECTORS is the number of vector defs to create. */
1391 vect_get_constant_vectors (slp_tree slp_node, VEC(tree,heap) **vec_oprnds,
1392 unsigned int op_num, unsigned int number_of_vectors)
1394 VEC (gimple, heap) *stmts = SLP_TREE_SCALAR_STMTS (slp_node);
1395 gimple stmt = VEC_index (gimple, stmts, 0);
1396 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1397 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1398 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1401 int j, number_of_places_left_in_vector;
1404 int group_size = VEC_length (gimple, stmts);
1405 unsigned int vec_num, i;
1406 int number_of_copies = 1;
1407 bool is_store = false;
1408 VEC (tree, heap) *voprnds = VEC_alloc (tree, heap, number_of_vectors);
1411 if (STMT_VINFO_DATA_REF (stmt_vinfo))
1414 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
1415 created vectors. It is greater than 1 if unrolling is performed.
1417 For example, we have two scalar operands, s1 and s2 (e.g., group of
1418 strided accesses of size two), while NUNITS is four (i.e., four scalars
1419 of this type can be packed in a vector). The output vector will contain
1420 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
1423 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
1424 containing the operands.
1426 For example, NUNITS is four as before, and the group size is 8
1427 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
1428 {s5, s6, s7, s8}. */
1430 number_of_copies = least_common_multiple (nunits, group_size) / group_size;
1432 number_of_places_left_in_vector = nunits;
1434 for (j = 0; j < number_of_copies; j++)
1436 for (i = group_size - 1; VEC_iterate (gimple, stmts, i, stmt); i--)
1439 op = gimple_assign_rhs1 (stmt);
1441 op = gimple_op (stmt, op_num + 1);
1442 if (!CONSTANT_CLASS_P (op))
1445 /* Create 'vect_ = {op0,op1,...,opn}'. */
1446 t = tree_cons (NULL_TREE, op, t);
1448 number_of_places_left_in_vector--;
1450 if (number_of_places_left_in_vector == 0)
1452 number_of_places_left_in_vector = nunits;
1454 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1455 gcc_assert (vector_type);
1457 vec_cst = build_vector (vector_type, t);
1459 vec_cst = build_constructor_from_list (vector_type, t);
1461 VEC_quick_push (tree, voprnds,
1462 vect_init_vector (stmt, vec_cst, vector_type,
1469 /* Since the vectors are created in the reverse order, we should invert
1471 vec_num = VEC_length (tree, voprnds);
1472 for (j = vec_num - 1; j >= 0; j--)
1474 vop = VEC_index (tree, voprnds, j);
1475 VEC_quick_push (tree, *vec_oprnds, vop);
1478 VEC_free (tree, heap, voprnds);
1480 /* In case that VF is greater than the unrolling factor needed for the SLP
1481 group of stmts, NUMBER_OF_VECTORS to be created is greater than
1482 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
1483 to replicate the vectors. */
1484 while (number_of_vectors > VEC_length (tree, *vec_oprnds))
1486 for (i = 0; VEC_iterate (tree, *vec_oprnds, i, vop) && i < vec_num; i++)
1487 VEC_quick_push (tree, *vec_oprnds, vop);
1492 /* Get vectorized definitions from SLP_NODE that contains corresponding
1493 vectorized def-stmts. */
1496 vect_get_slp_vect_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds)
1499 gimple vec_def_stmt;
1502 gcc_assert (SLP_TREE_VEC_STMTS (slp_node));
1505 VEC_iterate (gimple, SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt);
1508 gcc_assert (vec_def_stmt);
1509 vec_oprnd = gimple_get_lhs (vec_def_stmt);
1510 VEC_quick_push (tree, *vec_oprnds, vec_oprnd);
1515 /* Get vectorized definitions for SLP_NODE.
1516 If the scalar definitions are loop invariants or constants, collect them and
1517 call vect_get_constant_vectors() to create vector stmts.
1518 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
1519 must be stored in the LEFT/RIGHT node of SLP_NODE, and we call
1520 vect_get_slp_vect_defs() to retrieve them.
1521 If VEC_OPRNDS1 is NULL, don't get vector defs for the second operand (from
1522 the right node. This is used when the second operand must remain scalar. */
1525 vect_get_slp_defs (slp_tree slp_node, VEC (tree,heap) **vec_oprnds0,
1526 VEC (tree,heap) **vec_oprnds1)
1529 enum tree_code code;
1530 int number_of_vects;
1531 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
1533 first_stmt = VEC_index (gimple, SLP_TREE_SCALAR_STMTS (slp_node), 0);
1534 /* The number of vector defs is determined by the number of vector statements
1535 in the node from which we get those statements. */
1536 if (SLP_TREE_LEFT (slp_node))
1537 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_LEFT (slp_node));
1540 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1541 /* Number of vector stmts was calculated according to LHS in
1542 vect_schedule_slp_instance(), fix it by replacing LHS with RHS, if
1543 necessary. See vect_get_smallest_scalar_type() for details. */
1544 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
1546 if (rhs_size_unit != lhs_size_unit)
1548 number_of_vects *= rhs_size_unit;
1549 number_of_vects /= lhs_size_unit;
1553 /* Allocate memory for vectorized defs. */
1554 *vec_oprnds0 = VEC_alloc (tree, heap, number_of_vects);
1556 /* SLP_NODE corresponds either to a group of stores or to a group of
1557 unary/binary operations. We don't call this function for loads. */
1558 if (SLP_TREE_LEFT (slp_node))
1559 /* The defs are already vectorized. */
1560 vect_get_slp_vect_defs (SLP_TREE_LEFT (slp_node), vec_oprnds0);
1562 /* Build vectors from scalar defs. */
1563 vect_get_constant_vectors (slp_node, vec_oprnds0, 0, number_of_vects);
1565 if (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_stmt)))
1566 /* Since we don't call this function with loads, this is a group of
1570 code = gimple_assign_rhs_code (first_stmt);
1571 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS || !vec_oprnds1)
1574 /* The number of vector defs is determined by the number of vector statements
1575 in the node from which we get those statements. */
1576 if (SLP_TREE_RIGHT (slp_node))
1577 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_RIGHT (slp_node));
1579 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
1581 *vec_oprnds1 = VEC_alloc (tree, heap, number_of_vects);
1583 if (SLP_TREE_RIGHT (slp_node))
1584 /* The defs are already vectorized. */
1585 vect_get_slp_vect_defs (SLP_TREE_RIGHT (slp_node), vec_oprnds1);
1587 /* Build vectors from scalar defs. */
1588 vect_get_constant_vectors (slp_node, vec_oprnds1, 1, number_of_vects);
1592 /* Function get_initial_def_for_induction
1595 STMT - a stmt that performs an induction operation in the loop.
1596 IV_PHI - the initial value of the induction variable
1599 Return a vector variable, initialized with the first VF values of
1600 the induction variable. E.g., for an iv with IV_PHI='X' and
1601 evolution S, for a vector of 4 units, we want to return:
1602 [X, X + S, X + 2*S, X + 3*S]. */
1605 get_initial_def_for_induction (gimple iv_phi)
1607 stmt_vec_info stmt_vinfo = vinfo_for_stmt (iv_phi);
1608 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1609 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1610 tree scalar_type = TREE_TYPE (gimple_phi_result (iv_phi));
1613 edge pe = loop_preheader_edge (loop);
1614 struct loop *iv_loop;
1616 tree vec, vec_init, vec_step, t;
1620 gimple init_stmt, induction_phi, new_stmt;
1621 tree induc_def, vec_def, vec_dest;
1622 tree init_expr, step_expr;
1623 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1628 stmt_vec_info phi_info = vinfo_for_stmt (iv_phi);
1629 bool nested_in_vect_loop = false;
1630 gimple_seq stmts = NULL;
1631 imm_use_iterator imm_iter;
1632 use_operand_p use_p;
1636 gimple_stmt_iterator si;
1637 basic_block bb = gimple_bb (iv_phi);
1639 vectype = get_vectype_for_scalar_type (scalar_type);
1640 gcc_assert (vectype);
1641 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1642 ncopies = vf / nunits;
1644 gcc_assert (phi_info);
1645 gcc_assert (ncopies >= 1);
1647 /* Find the first insertion point in the BB. */
1648 si = gsi_after_labels (bb);
1650 if (INTEGRAL_TYPE_P (scalar_type) || POINTER_TYPE_P (scalar_type))
1651 step_expr = build_int_cst (scalar_type, 0);
1653 step_expr = build_real (scalar_type, dconst0);
1655 /* Is phi in an inner-loop, while vectorizing an enclosing outer-loop? */
1656 if (nested_in_vect_loop_p (loop, iv_phi))
1658 nested_in_vect_loop = true;
1659 iv_loop = loop->inner;
1663 gcc_assert (iv_loop == (gimple_bb (iv_phi))->loop_father);
1665 latch_e = loop_latch_edge (iv_loop);
1666 loop_arg = PHI_ARG_DEF_FROM_EDGE (iv_phi, latch_e);
1668 access_fn = analyze_scalar_evolution (iv_loop, PHI_RESULT (iv_phi));
1669 gcc_assert (access_fn);
1670 ok = vect_is_simple_iv_evolution (iv_loop->num, access_fn,
1671 &init_expr, &step_expr);
1673 pe = loop_preheader_edge (iv_loop);
1675 /* Create the vector that holds the initial_value of the induction. */
1676 if (nested_in_vect_loop)
1678 /* iv_loop is nested in the loop to be vectorized. init_expr had already
1679 been created during vectorization of previous stmts; We obtain it from
1680 the STMT_VINFO_VEC_STMT of the defining stmt. */
1681 tree iv_def = PHI_ARG_DEF_FROM_EDGE (iv_phi, loop_preheader_edge (iv_loop));
1682 vec_init = vect_get_vec_def_for_operand (iv_def, iv_phi, NULL);
1686 /* iv_loop is the loop to be vectorized. Create:
1687 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
1688 new_var = vect_get_new_vect_var (scalar_type, vect_scalar_var, "var_");
1689 add_referenced_var (new_var);
1691 new_name = force_gimple_operand (init_expr, &stmts, false, new_var);
1694 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
1695 gcc_assert (!new_bb);
1699 t = tree_cons (NULL_TREE, init_expr, t);
1700 for (i = 1; i < nunits; i++)
1702 /* Create: new_name_i = new_name + step_expr */
1703 enum tree_code code = POINTER_TYPE_P (scalar_type)
1704 ? POINTER_PLUS_EXPR : PLUS_EXPR;
1705 init_stmt = gimple_build_assign_with_ops (code, new_var,
1706 new_name, step_expr);
1707 new_name = make_ssa_name (new_var, init_stmt);
1708 gimple_assign_set_lhs (init_stmt, new_name);
1710 new_bb = gsi_insert_on_edge_immediate (pe, init_stmt);
1711 gcc_assert (!new_bb);
1713 if (vect_print_dump_info (REPORT_DETAILS))
1715 fprintf (vect_dump, "created new init_stmt: ");
1716 print_gimple_stmt (vect_dump, init_stmt, 0, TDF_SLIM);
1718 t = tree_cons (NULL_TREE, new_name, t);
1720 /* Create a vector from [new_name_0, new_name_1, ..., new_name_nunits-1] */
1721 vec = build_constructor_from_list (vectype, nreverse (t));
1722 vec_init = vect_init_vector (iv_phi, vec, vectype, NULL);
1726 /* Create the vector that holds the step of the induction. */
1727 if (nested_in_vect_loop)
1728 /* iv_loop is nested in the loop to be vectorized. Generate:
1729 vec_step = [S, S, S, S] */
1730 new_name = step_expr;
1733 /* iv_loop is the loop to be vectorized. Generate:
1734 vec_step = [VF*S, VF*S, VF*S, VF*S] */
1735 expr = build_int_cst (scalar_type, vf);
1736 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1740 for (i = 0; i < nunits; i++)
1741 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1742 gcc_assert (CONSTANT_CLASS_P (new_name));
1743 vec = build_vector (vectype, t);
1744 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1747 /* Create the following def-use cycle:
1752 vec_iv = PHI <vec_init, vec_loop>
1756 vec_loop = vec_iv + vec_step; */
1758 /* Create the induction-phi that defines the induction-operand. */
1759 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
1760 add_referenced_var (vec_dest);
1761 induction_phi = create_phi_node (vec_dest, iv_loop->header);
1762 set_vinfo_for_stmt (induction_phi,
1763 new_stmt_vec_info (induction_phi, loop_vinfo));
1764 induc_def = PHI_RESULT (induction_phi);
1766 /* Create the iv update inside the loop */
1767 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
1768 induc_def, vec_step);
1769 vec_def = make_ssa_name (vec_dest, new_stmt);
1770 gimple_assign_set_lhs (new_stmt, vec_def);
1771 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
1772 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo));
1774 /* Set the arguments of the phi node: */
1775 add_phi_arg (induction_phi, vec_init, pe);
1776 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop));
1779 /* In case that vectorization factor (VF) is bigger than the number
1780 of elements that we can fit in a vectype (nunits), we have to generate
1781 more than one vector stmt - i.e - we need to "unroll" the
1782 vector stmt by a factor VF/nunits. For more details see documentation
1783 in vectorizable_operation. */
1787 stmt_vec_info prev_stmt_vinfo;
1788 /* FORNOW. This restriction should be relaxed. */
1789 gcc_assert (!nested_in_vect_loop);
1791 /* Create the vector that holds the step of the induction. */
1792 expr = build_int_cst (scalar_type, nunits);
1793 new_name = fold_build2 (MULT_EXPR, scalar_type, expr, step_expr);
1795 for (i = 0; i < nunits; i++)
1796 t = tree_cons (NULL_TREE, unshare_expr (new_name), t);
1797 gcc_assert (CONSTANT_CLASS_P (new_name));
1798 vec = build_vector (vectype, t);
1799 vec_step = vect_init_vector (iv_phi, vec, vectype, NULL);
1801 vec_def = induc_def;
1802 prev_stmt_vinfo = vinfo_for_stmt (induction_phi);
1803 for (i = 1; i < ncopies; i++)
1805 /* vec_i = vec_prev + vec_step */
1806 new_stmt = gimple_build_assign_with_ops (PLUS_EXPR, vec_dest,
1808 vec_def = make_ssa_name (vec_dest, new_stmt);
1809 gimple_assign_set_lhs (new_stmt, vec_def);
1811 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
1812 set_vinfo_for_stmt (new_stmt,
1813 new_stmt_vec_info (new_stmt, loop_vinfo));
1814 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt;
1815 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
1819 if (nested_in_vect_loop)
1821 /* Find the loop-closed exit-phi of the induction, and record
1822 the final vector of induction results: */
1824 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, loop_arg)
1826 if (!flow_bb_inside_loop_p (iv_loop, gimple_bb (USE_STMT (use_p))))
1828 exit_phi = USE_STMT (use_p);
1834 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
1835 /* FORNOW. Currently not supporting the case that an inner-loop induction
1836 is not used in the outer-loop (i.e. only outside the outer-loop). */
1837 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
1838 && !STMT_VINFO_LIVE_P (stmt_vinfo));
1840 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt;
1841 if (vect_print_dump_info (REPORT_DETAILS))
1843 fprintf (vect_dump, "vector of inductions after inner-loop:");
1844 print_gimple_stmt (vect_dump, new_stmt, 0, TDF_SLIM);
1850 if (vect_print_dump_info (REPORT_DETAILS))
1852 fprintf (vect_dump, "transform induction: created def-use cycle: ");
1853 print_gimple_stmt (vect_dump, induction_phi, 0, TDF_SLIM);
1854 fprintf (vect_dump, "\n");
1855 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (vec_def), 0, TDF_SLIM);
1858 STMT_VINFO_VEC_STMT (phi_info) = induction_phi;
1863 /* Function vect_get_vec_def_for_operand.
1865 OP is an operand in STMT. This function returns a (vector) def that will be
1866 used in the vectorized stmt for STMT.
1868 In the case that OP is an SSA_NAME which is defined in the loop, then
1869 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1871 In case OP is an invariant or constant, a new stmt that creates a vector def
1872 needs to be introduced. */
1875 vect_get_vec_def_for_operand (tree op, gimple stmt, tree *scalar_def)
1880 stmt_vec_info def_stmt_info = NULL;
1881 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1882 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1883 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
1884 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1890 enum vect_def_type dt;
1894 if (vect_print_dump_info (REPORT_DETAILS))
1896 fprintf (vect_dump, "vect_get_vec_def_for_operand: ");
1897 print_generic_expr (vect_dump, op, TDF_SLIM);
1900 is_simple_use = vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt);
1901 gcc_assert (is_simple_use);
1902 if (vect_print_dump_info (REPORT_DETAILS))
1906 fprintf (vect_dump, "def = ");
1907 print_generic_expr (vect_dump, def, TDF_SLIM);
1911 fprintf (vect_dump, " def_stmt = ");
1912 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
1918 /* Case 1: operand is a constant. */
1919 case vect_constant_def:
1924 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1925 if (vect_print_dump_info (REPORT_DETAILS))
1926 fprintf (vect_dump, "Create vector_cst. nunits = %d", nunits);
1928 for (i = nunits - 1; i >= 0; --i)
1930 t = tree_cons (NULL_TREE, op, t);
1932 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
1933 gcc_assert (vector_type);
1934 vec_cst = build_vector (vector_type, t);
1936 return vect_init_vector (stmt, vec_cst, vector_type, NULL);
1939 /* Case 2: operand is defined outside the loop - loop invariant. */
1940 case vect_invariant_def:
1945 /* Create 'vec_inv = {inv,inv,..,inv}' */
1946 if (vect_print_dump_info (REPORT_DETAILS))
1947 fprintf (vect_dump, "Create vector_inv.");
1949 for (i = nunits - 1; i >= 0; --i)
1951 t = tree_cons (NULL_TREE, def, t);
1954 /* FIXME: use build_constructor directly. */
1955 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def));
1956 gcc_assert (vector_type);
1957 vec_inv = build_constructor_from_list (vector_type, t);
1958 return vect_init_vector (stmt, vec_inv, vector_type, NULL);
1961 /* Case 3: operand is defined inside the loop. */
1965 *scalar_def = NULL/* FIXME tuples: def_stmt*/;
1967 /* Get the def from the vectorized stmt. */
1968 def_stmt_info = vinfo_for_stmt (def_stmt);
1969 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1970 gcc_assert (vec_stmt);
1971 if (gimple_code (vec_stmt) == GIMPLE_PHI)
1972 vec_oprnd = PHI_RESULT (vec_stmt);
1973 else if (is_gimple_call (vec_stmt))
1974 vec_oprnd = gimple_call_lhs (vec_stmt);
1976 vec_oprnd = gimple_assign_lhs (vec_stmt);
1980 /* Case 4: operand is defined by a loop header phi - reduction */
1981 case vect_reduction_def:
1985 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1986 loop = (gimple_bb (def_stmt))->loop_father;
1988 /* Get the def before the loop */
1989 op = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
1990 return get_initial_def_for_reduction (stmt, op, scalar_def);
1993 /* Case 5: operand is defined by loop-header phi - induction. */
1994 case vect_induction_def:
1996 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
1998 /* Get the def from the vectorized stmt. */
1999 def_stmt_info = vinfo_for_stmt (def_stmt);
2000 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2001 gcc_assert (vec_stmt && gimple_code (vec_stmt) == GIMPLE_PHI);
2002 vec_oprnd = PHI_RESULT (vec_stmt);
2012 /* Function vect_get_vec_def_for_stmt_copy
2014 Return a vector-def for an operand. This function is used when the
2015 vectorized stmt to be created (by the caller to this function) is a "copy"
2016 created in case the vectorized result cannot fit in one vector, and several
2017 copies of the vector-stmt are required. In this case the vector-def is
2018 retrieved from the vector stmt recorded in the STMT_VINFO_RELATED_STMT field
2019 of the stmt that defines VEC_OPRND.
2020 DT is the type of the vector def VEC_OPRND.
2023 In case the vectorization factor (VF) is bigger than the number
2024 of elements that can fit in a vectype (nunits), we have to generate
2025 more than one vector stmt to vectorize the scalar stmt. This situation
2026 arises when there are multiple data-types operated upon in the loop; the
2027 smallest data-type determines the VF, and as a result, when vectorizing
2028 stmts operating on wider types we need to create 'VF/nunits' "copies" of the
2029 vector stmt (each computing a vector of 'nunits' results, and together
2030 computing 'VF' results in each iteration). This function is called when
2031 vectorizing such a stmt (e.g. vectorizing S2 in the illustration below, in
2032 which VF=16 and nunits=4, so the number of copies required is 4):
2034 scalar stmt: vectorized into: STMT_VINFO_RELATED_STMT
2036 S1: x = load VS1.0: vx.0 = memref0 VS1.1
2037 VS1.1: vx.1 = memref1 VS1.2
2038 VS1.2: vx.2 = memref2 VS1.3
2039 VS1.3: vx.3 = memref3
2041 S2: z = x + ... VSnew.0: vz0 = vx.0 + ... VSnew.1
2042 VSnew.1: vz1 = vx.1 + ... VSnew.2
2043 VSnew.2: vz2 = vx.2 + ... VSnew.3
2044 VSnew.3: vz3 = vx.3 + ...
2046 The vectorization of S1 is explained in vectorizable_load.
2047 The vectorization of S2:
2048 To create the first vector-stmt out of the 4 copies - VSnew.0 -
2049 the function 'vect_get_vec_def_for_operand' is called to
2050 get the relevant vector-def for each operand of S2. For operand x it
2051 returns the vector-def 'vx.0'.
2053 To create the remaining copies of the vector-stmt (VSnew.j), this
2054 function is called to get the relevant vector-def for each operand. It is
2055 obtained from the respective VS1.j stmt, which is recorded in the
2056 STMT_VINFO_RELATED_STMT field of the stmt that defines VEC_OPRND.
2058 For example, to obtain the vector-def 'vx.1' in order to create the
2059 vector stmt 'VSnew.1', this function is called with VEC_OPRND='vx.0'.
2060 Given 'vx0' we obtain the stmt that defines it ('VS1.0'); from the
2061 STMT_VINFO_RELATED_STMT field of 'VS1.0' we obtain the next copy - 'VS1.1',
2062 and return its def ('vx.1').
2063 Overall, to create the above sequence this function will be called 3 times:
2064 vx.1 = vect_get_vec_def_for_stmt_copy (dt, vx.0);
2065 vx.2 = vect_get_vec_def_for_stmt_copy (dt, vx.1);
2066 vx.3 = vect_get_vec_def_for_stmt_copy (dt, vx.2); */
2069 vect_get_vec_def_for_stmt_copy (enum vect_def_type dt, tree vec_oprnd)
2071 gimple vec_stmt_for_operand;
2072 stmt_vec_info def_stmt_info;
2074 /* Do nothing; can reuse same def. */
2075 if (dt == vect_invariant_def || dt == vect_constant_def )
2078 vec_stmt_for_operand = SSA_NAME_DEF_STMT (vec_oprnd);
2079 def_stmt_info = vinfo_for_stmt (vec_stmt_for_operand);
2080 gcc_assert (def_stmt_info);
2081 vec_stmt_for_operand = STMT_VINFO_RELATED_STMT (def_stmt_info);
2082 gcc_assert (vec_stmt_for_operand);
2083 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
2084 if (gimple_code (vec_stmt_for_operand) == GIMPLE_PHI)
2085 vec_oprnd = PHI_RESULT (vec_stmt_for_operand);
2087 vec_oprnd = gimple_get_lhs (vec_stmt_for_operand);
2092 /* Get vectorized definitions for the operands to create a copy of an original
2093 stmt. See vect_get_vec_def_for_stmt_copy() for details. */
2096 vect_get_vec_defs_for_stmt_copy (enum vect_def_type *dt,
2097 VEC(tree,heap) **vec_oprnds0,
2098 VEC(tree,heap) **vec_oprnds1)
2100 tree vec_oprnd = VEC_pop (tree, *vec_oprnds0);
2102 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd);
2103 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
2105 if (vec_oprnds1 && *vec_oprnds1)
2107 vec_oprnd = VEC_pop (tree, *vec_oprnds1);
2108 vec_oprnd = vect_get_vec_def_for_stmt_copy (dt[1], vec_oprnd);
2109 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
2114 /* Get vectorized definitions for OP0 and OP1, or SLP_NODE if it is not NULL. */
2117 vect_get_vec_defs (tree op0, tree op1, gimple stmt,
2118 VEC(tree,heap) **vec_oprnds0, VEC(tree,heap) **vec_oprnds1,
2122 vect_get_slp_defs (slp_node, vec_oprnds0, vec_oprnds1);
2127 *vec_oprnds0 = VEC_alloc (tree, heap, 1);
2128 vec_oprnd = vect_get_vec_def_for_operand (op0, stmt, NULL);
2129 VEC_quick_push (tree, *vec_oprnds0, vec_oprnd);
2133 *vec_oprnds1 = VEC_alloc (tree, heap, 1);
2134 vec_oprnd = vect_get_vec_def_for_operand (op1, stmt, NULL);
2135 VEC_quick_push (tree, *vec_oprnds1, vec_oprnd);
2141 /* Function vect_finish_stmt_generation.
2143 Insert a new stmt. */
2146 vect_finish_stmt_generation (gimple stmt, gimple vec_stmt,
2147 gimple_stmt_iterator *gsi)
2149 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2150 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2152 gcc_assert (stmt == gsi_stmt (*gsi));
2153 gcc_assert (gimple_code (stmt) != GIMPLE_LABEL);
2155 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
2157 set_vinfo_for_stmt (vec_stmt, new_stmt_vec_info (vec_stmt, loop_vinfo));
2159 if (vect_print_dump_info (REPORT_DETAILS))
2161 fprintf (vect_dump, "add new stmt: ");
2162 print_gimple_stmt (vect_dump, vec_stmt, 0, TDF_SLIM);
2165 /* Make sure gsi points to the stmt that is being vectorized. */
2166 gcc_assert (stmt == gsi_stmt (*gsi));
2168 gimple_set_location (vec_stmt, gimple_location (stmt));
2172 /* Function get_initial_def_for_reduction
2175 STMT - a stmt that performs a reduction operation in the loop.
2176 INIT_VAL - the initial value of the reduction variable
2179 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
2180 of the reduction (used for adjusting the epilog - see below).
2181 Return a vector variable, initialized according to the operation that STMT
2182 performs. This vector will be used as the initial value of the
2183 vector of partial results.
2185 Option1 (adjust in epilog): Initialize the vector as follows:
2188 min/max: [init_val,init_val,..,init_val,init_val]
2189 bit and/or: [init_val,init_val,..,init_val,init_val]
2190 and when necessary (e.g. add/mult case) let the caller know
2191 that it needs to adjust the result by init_val.
2193 Option2: Initialize the vector as follows:
2194 add: [0,0,...,0,init_val]
2195 mult: [1,1,...,1,init_val]
2196 min/max: [init_val,init_val,...,init_val]
2197 bit and/or: [init_val,init_val,...,init_val]
2198 and no adjustments are needed.
2200 For example, for the following code:
2206 STMT is 's = s + a[i]', and the reduction variable is 's'.
2207 For a vector of 4 units, we want to return either [0,0,0,init_val],
2208 or [0,0,0,0] and let the caller know that it needs to adjust
2209 the result at the end by 'init_val'.
2211 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
2212 initialization vector is simpler (same element in all entries).
2213 A cost model should help decide between these two schemes. */
2216 get_initial_def_for_reduction (gimple stmt, tree init_val, tree *adjustment_def)
2218 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2219 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2220 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2221 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2222 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2223 enum tree_code code = gimple_assign_rhs_code (stmt);
2224 tree type = TREE_TYPE (init_val);
2231 bool nested_in_vect_loop = false;
2233 gcc_assert (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type) || SCALAR_FLOAT_TYPE_P (type));
2234 if (nested_in_vect_loop_p (loop, stmt))
2235 nested_in_vect_loop = true;
2237 gcc_assert (loop == (gimple_bb (stmt))->loop_father);
2239 vecdef = vect_get_vec_def_for_operand (init_val, stmt, NULL);
2243 case WIDEN_SUM_EXPR:
2246 if (nested_in_vect_loop)
2247 *adjustment_def = vecdef;
2249 *adjustment_def = init_val;
2250 /* Create a vector of zeros for init_def. */
2251 if (SCALAR_FLOAT_TYPE_P (type))
2252 def_for_init = build_real (type, dconst0);
2254 def_for_init = build_int_cst (type, 0);
2255 for (i = nunits - 1; i >= 0; --i)
2256 t = tree_cons (NULL_TREE, def_for_init, t);
2257 vector_type = get_vectype_for_scalar_type (TREE_TYPE (def_for_init));
2258 gcc_assert (vector_type);
2259 init_def = build_vector (vector_type, t);
2264 *adjustment_def = NULL_TREE;
2276 /* Function vect_create_epilog_for_reduction
2278 Create code at the loop-epilog to finalize the result of a reduction
2281 VECT_DEF is a vector of partial results.
2282 REDUC_CODE is the tree-code for the epilog reduction.
2283 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
2284 number of elements that we can fit in a vectype (nunits). In this case
2285 we have to generate more than one vector stmt - i.e - we need to "unroll"
2286 the vector stmt by a factor VF/nunits. For more details see documentation
2287 in vectorizable_operation.
2288 STMT is the scalar reduction stmt that is being vectorized.
2289 REDUCTION_PHI is the phi-node that carries the reduction computation.
2292 1. Creates the reduction def-use cycle: sets the arguments for
2294 The loop-entry argument is the vectorized initial-value of the reduction.
2295 The loop-latch argument is VECT_DEF - the vector of partial sums.
2296 2. "Reduces" the vector of partial results VECT_DEF into a single result,
2297 by applying the operation specified by REDUC_CODE if available, or by
2298 other means (whole-vector shifts or a scalar loop).
2299 The function also creates a new phi node at the loop exit to preserve
2300 loop-closed form, as illustrated below.
2302 The flow at the entry to this function:
2305 vec_def = phi <null, null> # REDUCTION_PHI
2306 VECT_DEF = vector_stmt # vectorized form of STMT
2307 s_loop = scalar_stmt # (scalar) STMT
2309 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2313 The above is transformed by this function into:
2316 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
2317 VECT_DEF = vector_stmt # vectorized form of STMT
2318 s_loop = scalar_stmt # (scalar) STMT
2320 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
2321 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2322 v_out2 = reduce <v_out1>
2323 s_out3 = extract_field <v_out2, 0>
2324 s_out4 = adjust_result <s_out3>
2330 vect_create_epilog_for_reduction (tree vect_def, gimple stmt,
2332 enum tree_code reduc_code,
2333 gimple reduction_phi)
2335 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2336 stmt_vec_info prev_phi_info;
2338 enum machine_mode mode;
2339 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2340 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2341 basic_block exit_bb;
2344 gimple new_phi = NULL, phi;
2345 gimple_stmt_iterator exit_gsi;
2347 tree new_temp = NULL_TREE;
2349 gimple epilog_stmt = NULL;
2350 tree new_scalar_dest, new_dest;
2352 tree bitsize, bitpos, bytesize;
2353 enum tree_code code = gimple_assign_rhs_code (stmt);
2354 tree adjustment_def;
2355 tree vec_initial_def, def;
2357 imm_use_iterator imm_iter;
2358 use_operand_p use_p;
2359 bool extract_scalar_result = false;
2360 tree reduction_op, expr;
2363 bool nested_in_vect_loop = false;
2364 VEC(gimple,heap) *phis = NULL;
2365 enum vect_def_type dt = vect_unknown_def_type;
2368 if (nested_in_vect_loop_p (loop, stmt))
2371 nested_in_vect_loop = true;
2374 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2376 case GIMPLE_SINGLE_RHS:
2377 gcc_assert (TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt)) == ternary_op);
2378 reduction_op = TREE_OPERAND (gimple_assign_rhs1 (stmt), 2);
2380 case GIMPLE_UNARY_RHS:
2381 reduction_op = gimple_assign_rhs1 (stmt);
2383 case GIMPLE_BINARY_RHS:
2384 reduction_op = gimple_assign_rhs2 (stmt);
2390 vectype = get_vectype_for_scalar_type (TREE_TYPE (reduction_op));
2391 gcc_assert (vectype);
2392 mode = TYPE_MODE (vectype);
2394 /*** 1. Create the reduction def-use cycle ***/
2396 /* For the case of reduction, vect_get_vec_def_for_operand returns
2397 the scalar def before the loop, that defines the initial value
2398 of the reduction variable. */
2399 vec_initial_def = vect_get_vec_def_for_operand (reduction_op, stmt,
2402 phi = reduction_phi;
2404 for (j = 0; j < ncopies; j++)
2406 /* 1.1 set the loop-entry arg of the reduction-phi: */
2407 add_phi_arg (phi, vec_initial_def, loop_preheader_edge (loop));
2409 /* 1.2 set the loop-latch arg for the reduction-phi: */
2411 def = vect_get_vec_def_for_stmt_copy (dt, def);
2412 add_phi_arg (phi, def, loop_latch_edge (loop));
2414 if (vect_print_dump_info (REPORT_DETAILS))
2416 fprintf (vect_dump, "transform reduction: created def-use cycle: ");
2417 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM);
2418 fprintf (vect_dump, "\n");
2419 print_gimple_stmt (vect_dump, SSA_NAME_DEF_STMT (def), 0, TDF_SLIM);
2422 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi));
2425 /*** 2. Create epilog code
2426 The reduction epilog code operates across the elements of the vector
2427 of partial results computed by the vectorized loop.
2428 The reduction epilog code consists of:
2429 step 1: compute the scalar result in a vector (v_out2)
2430 step 2: extract the scalar result (s_out3) from the vector (v_out2)
2431 step 3: adjust the scalar result (s_out3) if needed.
2433 Step 1 can be accomplished using one the following three schemes:
2434 (scheme 1) using reduc_code, if available.
2435 (scheme 2) using whole-vector shifts, if available.
2436 (scheme 3) using a scalar loop. In this case steps 1+2 above are
2439 The overall epilog code looks like this:
2441 s_out0 = phi <s_loop> # original EXIT_PHI
2442 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
2443 v_out2 = reduce <v_out1> # step 1
2444 s_out3 = extract_field <v_out2, 0> # step 2
2445 s_out4 = adjust_result <s_out3> # step 3
2447 (step 3 is optional, and steps 1 and 2 may be combined).
2448 Lastly, the uses of s_out0 are replaced by s_out4.
2452 /* 2.1 Create new loop-exit-phi to preserve loop-closed form:
2453 v_out1 = phi <v_loop> */
2455 exit_bb = single_exit (loop)->dest;
2457 prev_phi_info = NULL;
2458 for (j = 0; j < ncopies; j++)
2460 phi = create_phi_node (SSA_NAME_VAR (vect_def), exit_bb);
2461 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo));
2466 def = vect_get_vec_def_for_stmt_copy (dt, def);
2467 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi;
2469 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
2470 prev_phi_info = vinfo_for_stmt (phi);
2472 exit_gsi = gsi_after_labels (exit_bb);
2474 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
2475 (i.e. when reduc_code is not available) and in the final adjustment
2476 code (if needed). Also get the original scalar reduction variable as
2477 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
2478 represents a reduction pattern), the tree-code and scalar-def are
2479 taken from the original stmt that the pattern-stmt (STMT) replaces.
2480 Otherwise (it is a regular reduction) - the tree-code and scalar-def
2481 are taken from STMT. */
2483 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2486 /* Regular reduction */
2491 /* Reduction pattern */
2492 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt);
2493 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
2494 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt);
2496 code = gimple_assign_rhs_code (orig_stmt);
2497 scalar_dest = gimple_assign_lhs (orig_stmt);
2498 scalar_type = TREE_TYPE (scalar_dest);
2499 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
2500 bitsize = TYPE_SIZE (scalar_type);
2501 bytesize = TYPE_SIZE_UNIT (scalar_type);
2504 /* In case this is a reduction in an inner-loop while vectorizing an outer
2505 loop - we don't need to extract a single scalar result at the end of the
2506 inner-loop. The final vector of partial results will be used in the
2507 vectorized outer-loop, or reduced to a scalar result at the end of the
2509 if (nested_in_vect_loop)
2510 goto vect_finalize_reduction;
2513 gcc_assert (ncopies == 1);
2515 /* 2.3 Create the reduction code, using one of the three schemes described
2518 if (reduc_code < NUM_TREE_CODES)
2522 /*** Case 1: Create:
2523 v_out2 = reduc_expr <v_out1> */
2525 if (vect_print_dump_info (REPORT_DETAILS))
2526 fprintf (vect_dump, "Reduce using direct vector reduction.");
2528 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2529 tmp = build1 (reduc_code, vectype, PHI_RESULT (new_phi));
2530 epilog_stmt = gimple_build_assign (vec_dest, tmp);
2531 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2532 gimple_assign_set_lhs (epilog_stmt, new_temp);
2533 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2535 extract_scalar_result = true;
2539 enum tree_code shift_code = 0;
2540 bool have_whole_vector_shift = true;
2542 int element_bitsize = tree_low_cst (bitsize, 1);
2543 int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2546 if (optab_handler (vec_shr_optab, mode)->insn_code != CODE_FOR_nothing)
2547 shift_code = VEC_RSHIFT_EXPR;
2549 have_whole_vector_shift = false;
2551 /* Regardless of whether we have a whole vector shift, if we're
2552 emulating the operation via tree-vect-generic, we don't want
2553 to use it. Only the first round of the reduction is likely
2554 to still be profitable via emulation. */
2555 /* ??? It might be better to emit a reduction tree code here, so that
2556 tree-vect-generic can expand the first round via bit tricks. */
2557 if (!VECTOR_MODE_P (mode))
2558 have_whole_vector_shift = false;
2561 optab optab = optab_for_tree_code (code, vectype, optab_default);
2562 if (optab_handler (optab, mode)->insn_code == CODE_FOR_nothing)
2563 have_whole_vector_shift = false;
2566 if (have_whole_vector_shift)
2568 /*** Case 2: Create:
2569 for (offset = VS/2; offset >= element_size; offset/=2)
2571 Create: va' = vec_shift <va, offset>
2572 Create: va = vop <va, va'>
2575 if (vect_print_dump_info (REPORT_DETAILS))
2576 fprintf (vect_dump, "Reduce using vector shifts");
2578 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2579 new_temp = PHI_RESULT (new_phi);
2581 for (bit_offset = vec_size_in_bits/2;
2582 bit_offset >= element_bitsize;
2585 tree bitpos = size_int (bit_offset);
2586 epilog_stmt = gimple_build_assign_with_ops (shift_code, vec_dest,
2588 new_name = make_ssa_name (vec_dest, epilog_stmt);
2589 gimple_assign_set_lhs (epilog_stmt, new_name);
2590 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2592 epilog_stmt = gimple_build_assign_with_ops (code, vec_dest,
2593 new_name, new_temp);
2594 new_temp = make_ssa_name (vec_dest, epilog_stmt);
2595 gimple_assign_set_lhs (epilog_stmt, new_temp);
2596 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2599 extract_scalar_result = true;
2605 /*** Case 3: Create:
2606 s = extract_field <v_out2, 0>
2607 for (offset = element_size;
2608 offset < vector_size;
2609 offset += element_size;)
2611 Create: s' = extract_field <v_out2, offset>
2612 Create: s = op <s, s'>
2615 if (vect_print_dump_info (REPORT_DETAILS))
2616 fprintf (vect_dump, "Reduce using scalar code. ");
2618 vec_temp = PHI_RESULT (new_phi);
2619 vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
2620 rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2622 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
2623 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2624 gimple_assign_set_lhs (epilog_stmt, new_temp);
2625 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2627 for (bit_offset = element_bitsize;
2628 bit_offset < vec_size_in_bits;
2629 bit_offset += element_bitsize)
2631 tree bitpos = bitsize_int (bit_offset);
2632 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
2635 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
2636 new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
2637 gimple_assign_set_lhs (epilog_stmt, new_name);
2638 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2640 epilog_stmt = gimple_build_assign_with_ops (code,
2642 new_name, new_temp);
2643 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2644 gimple_assign_set_lhs (epilog_stmt, new_temp);
2645 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2648 extract_scalar_result = false;
2652 /* 2.4 Extract the final scalar result. Create:
2653 s_out3 = extract_field <v_out2, bitpos> */
2655 if (extract_scalar_result)
2659 gcc_assert (!nested_in_vect_loop);
2660 if (vect_print_dump_info (REPORT_DETAILS))
2661 fprintf (vect_dump, "extract scalar result");
2663 if (BYTES_BIG_ENDIAN)
2664 bitpos = size_binop (MULT_EXPR,
2665 bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
2666 TYPE_SIZE (scalar_type));
2668 bitpos = bitsize_zero_node;
2670 rhs = build3 (BIT_FIELD_REF, scalar_type, new_temp, bitsize, bitpos);
2671 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
2672 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
2673 gimple_assign_set_lhs (epilog_stmt, new_temp);
2674 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2677 vect_finalize_reduction:
2679 /* 2.5 Adjust the final result by the initial value of the reduction
2680 variable. (When such adjustment is not needed, then
2681 'adjustment_def' is zero). For example, if code is PLUS we create:
2682 new_temp = loop_exit_def + adjustment_def */
2686 if (nested_in_vect_loop)
2688 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
2689 expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
2690 new_dest = vect_create_destination_var (scalar_dest, vectype);
2694 gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
2695 expr = build2 (code, scalar_type, new_temp, adjustment_def);
2696 new_dest = vect_create_destination_var (scalar_dest, scalar_type);
2698 epilog_stmt = gimple_build_assign (new_dest, expr);
2699 new_temp = make_ssa_name (new_dest, epilog_stmt);
2700 gimple_assign_set_lhs (epilog_stmt, new_temp);
2701 SSA_NAME_DEF_STMT (new_temp) = epilog_stmt;
2702 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
2706 /* 2.6 Handle the loop-exit phi */
2708 /* Replace uses of s_out0 with uses of s_out3:
2709 Find the loop-closed-use at the loop exit of the original scalar result.
2710 (The reduction result is expected to have two immediate uses - one at the
2711 latch block, and one at the loop exit). */
2712 phis = VEC_alloc (gimple, heap, 10);
2713 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
2715 if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2717 exit_phi = USE_STMT (use_p);
2718 VEC_quick_push (gimple, phis, exit_phi);
2721 /* We expect to have found an exit_phi because of loop-closed-ssa form. */
2722 gcc_assert (!VEC_empty (gimple, phis));
2724 for (i = 0; VEC_iterate (gimple, phis, i, exit_phi); i++)
2726 if (nested_in_vect_loop)
2728 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi);
2730 /* FORNOW. Currently not supporting the case that an inner-loop
2731 reduction is not used in the outer-loop (but only outside the
2733 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
2734 && !STMT_VINFO_LIVE_P (stmt_vinfo));
2736 epilog_stmt = adjustment_def ? epilog_stmt : new_phi;
2737 STMT_VINFO_VEC_STMT (stmt_vinfo) = epilog_stmt;
2738 set_vinfo_for_stmt (epilog_stmt,
2739 new_stmt_vec_info (epilog_stmt, loop_vinfo));
2741 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) =
2742 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
2746 /* Replace the uses: */
2747 orig_name = PHI_RESULT (exit_phi);
2748 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
2749 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
2750 SET_USE (use_p, new_temp);
2752 VEC_free (gimple, heap, phis);
2756 /* Function vectorizable_reduction.
2758 Check if STMT performs a reduction operation that can be vectorized.
2759 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2760 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2761 Return FALSE if not a vectorizable STMT, TRUE otherwise.
2763 This function also handles reduction idioms (patterns) that have been
2764 recognized in advance during vect_pattern_recog. In this case, STMT may be
2766 X = pattern_expr (arg0, arg1, ..., X)
2767 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original
2768 sequence that had been detected and replaced by the pattern-stmt (STMT).
2770 In some cases of reduction patterns, the type of the reduction variable X is
2771 different than the type of the other arguments of STMT.
2772 In such cases, the vectype that is used when transforming STMT into a vector
2773 stmt is different than the vectype that is used to determine the
2774 vectorization factor, because it consists of a different number of elements
2775 than the actual number of elements that are being operated upon in parallel.
2777 For example, consider an accumulation of shorts into an int accumulator.
2778 On some targets it's possible to vectorize this pattern operating on 8
2779 shorts at a time (hence, the vectype for purposes of determining the
2780 vectorization factor should be V8HI); on the other hand, the vectype that
2781 is used to create the vector form is actually V4SI (the type of the result).
2783 Upon entry to this function, STMT_VINFO_VECTYPE records the vectype that
2784 indicates what is the actual level of parallelism (V8HI in the example), so
2785 that the right vectorization factor would be derived. This vectype
2786 corresponds to the type of arguments to the reduction stmt, and should *NOT*
2787 be used to create the vectorized stmt. The right vectype for the vectorized
2788 stmt is obtained from the type of the result X:
2789 get_vectype_for_scalar_type (TREE_TYPE (X))
2791 This means that, contrary to "regular" reductions (or "regular" stmts in
2792 general), the following equation:
2793 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
2794 does *NOT* necessarily hold for reduction patterns. */
2797 vectorizable_reduction (gimple stmt, gimple_stmt_iterator *gsi,
2802 tree loop_vec_def0 = NULL_TREE, loop_vec_def1 = NULL_TREE;
2803 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2804 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2805 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2806 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2807 enum tree_code code, orig_code, epilog_reduc_code = 0;
2808 enum machine_mode vec_mode;
2810 optab optab, reduc_optab;
2811 tree new_temp = NULL_TREE;
2814 enum vect_def_type dt;
2815 gimple new_phi = NULL;
2819 stmt_vec_info orig_stmt_info;
2820 tree expr = NULL_TREE;
2822 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
2823 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
2825 stmt_vec_info prev_stmt_info, prev_phi_info;
2826 gimple first_phi = NULL;
2827 bool single_defuse_cycle = false;
2829 gimple new_stmt = NULL;
2833 if (nested_in_vect_loop_p (loop, stmt))
2836 gcc_assert (ncopies >= 1);
2838 /* FORNOW: SLP not supported. */
2839 if (STMT_SLP_TYPE (stmt_info))
2842 /* 1. Is vectorizable reduction? */
2844 /* Not supportable if the reduction variable is used in the loop. */
2845 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer)
2848 /* Reductions that are not used even in an enclosing outer-loop,
2849 are expected to be "live" (used out of the loop). */
2850 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop
2851 && !STMT_VINFO_LIVE_P (stmt_info))
2854 /* Make sure it was already recognized as a reduction computation. */
2855 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2858 /* 2. Has this been recognized as a reduction pattern?
2860 Check if STMT represents a pattern that has been recognized
2861 in earlier analysis stages. For stmts that represent a pattern,
2862 the STMT_VINFO_RELATED_STMT field records the last stmt in
2863 the original sequence that constitutes the pattern. */
2865 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2868 orig_stmt_info = vinfo_for_stmt (orig_stmt);
2869 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt);
2870 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
2871 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
2874 /* 3. Check the operands of the operation. The first operands are defined
2875 inside the loop body. The last operand is the reduction variable,
2876 which is defined by the loop-header-phi. */
2878 gcc_assert (is_gimple_assign (stmt));
2881 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
2883 case GIMPLE_SINGLE_RHS:
2884 op_type = TREE_OPERAND_LENGTH (gimple_assign_rhs1 (stmt));
2885 if (op_type == ternary_op)
2887 tree rhs = gimple_assign_rhs1 (stmt);
2888 ops[0] = TREE_OPERAND (rhs, 0);
2889 ops[1] = TREE_OPERAND (rhs, 1);
2890 ops[2] = TREE_OPERAND (rhs, 2);
2891 code = TREE_CODE (rhs);
2897 case GIMPLE_BINARY_RHS:
2898 code = gimple_assign_rhs_code (stmt);
2899 op_type = TREE_CODE_LENGTH (code);
2900 gcc_assert (op_type == binary_op);
2901 ops[0] = gimple_assign_rhs1 (stmt);
2902 ops[1] = gimple_assign_rhs2 (stmt);
2905 case GIMPLE_UNARY_RHS:
2912 scalar_dest = gimple_assign_lhs (stmt);
2913 scalar_type = TREE_TYPE (scalar_dest);
2914 if (!POINTER_TYPE_P (scalar_type) && !INTEGRAL_TYPE_P (scalar_type)
2915 && !SCALAR_FLOAT_TYPE_P (scalar_type))
2918 /* All uses but the last are expected to be defined in the loop.
2919 The last use is the reduction variable. */
2920 for (i = 0; i < op_type-1; i++)
2922 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt,
2924 gcc_assert (is_simple_use);
2925 if (dt != vect_loop_def
2926 && dt != vect_invariant_def
2927 && dt != vect_constant_def
2928 && dt != vect_induction_def)
2932 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &def_stmt, &def, &dt);
2933 gcc_assert (is_simple_use);
2934 gcc_assert (dt == vect_reduction_def);
2935 gcc_assert (gimple_code (def_stmt) == GIMPLE_PHI);
2937 gcc_assert (orig_stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2939 gcc_assert (stmt == vect_is_simple_reduction (loop_vinfo, def_stmt));
2941 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
2944 /* 4. Supportable by target? */
2946 /* 4.1. check support for the operation in the loop */
2947 optab = optab_for_tree_code (code, vectype, optab_default);
2950 if (vect_print_dump_info (REPORT_DETAILS))
2951 fprintf (vect_dump, "no optab.");
2954 vec_mode = TYPE_MODE (vectype);
2955 if (optab_handler (optab, vec_mode)->insn_code == CODE_FOR_nothing)
2957 if (vect_print_dump_info (REPORT_DETAILS))
2958 fprintf (vect_dump, "op not supported by target.");
2959 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD
2960 || LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2961 < vect_min_worthwhile_factor (code))
2963 if (vect_print_dump_info (REPORT_DETAILS))
2964 fprintf (vect_dump, "proceeding using word mode.");
2967 /* Worthwhile without SIMD support? */
2968 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
2969 && LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2970 < vect_min_worthwhile_factor (code))
2972 if (vect_print_dump_info (REPORT_DETAILS))
2973 fprintf (vect_dump, "not worthwhile without SIMD support.");
2977 /* 4.2. Check support for the epilog operation.
2979 If STMT represents a reduction pattern, then the type of the
2980 reduction variable may be different than the type of the rest
2981 of the arguments. For example, consider the case of accumulation
2982 of shorts into an int accumulator; The original code:
2983 S1: int_a = (int) short_a;
2984 orig_stmt-> S2: int_acc = plus <int_a ,int_acc>;
2987 STMT: int_acc = widen_sum <short_a, int_acc>
2990 1. The tree-code that is used to create the vector operation in the
2991 epilog code (that reduces the partial results) is not the
2992 tree-code of STMT, but is rather the tree-code of the original
2993 stmt from the pattern that STMT is replacing. I.e, in the example
2994 above we want to use 'widen_sum' in the loop, but 'plus' in the
2996 2. The type (mode) we use to check available target support
2997 for the vector operation to be created in the *epilog*, is
2998 determined by the type of the reduction variable (in the example
2999 above we'd check this: plus_optab[vect_int_mode]).
3000 However the type (mode) we use to check available target support
3001 for the vector operation to be created *inside the loop*, is
3002 determined by the type of the other arguments to STMT (in the
3003 example we'd check this: widen_sum_optab[vect_short_mode]).
3005 This is contrary to "regular" reductions, in which the types of all
3006 the arguments are the same as the type of the reduction variable.
3007 For "regular" reductions we can therefore use the same vector type
3008 (and also the same tree-code) when generating the epilog code and
3009 when generating the code inside the loop. */
3013 /* This is a reduction pattern: get the vectype from the type of the
3014 reduction variable, and get the tree-code from orig_stmt. */
3015 orig_code = gimple_assign_rhs_code (orig_stmt);
3016 vectype = get_vectype_for_scalar_type (TREE_TYPE (def));
3019 if (vect_print_dump_info (REPORT_DETAILS))
3021 fprintf (vect_dump, "unsupported data-type ");
3022 print_generic_expr (vect_dump, TREE_TYPE (def), TDF_SLIM);
3027 vec_mode = TYPE_MODE (vectype);
3031 /* Regular reduction: use the same vectype and tree-code as used for
3032 the vector code inside the loop can be used for the epilog code. */
3036 if (!reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
3038 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype, optab_default);
3041 if (vect_print_dump_info (REPORT_DETAILS))
3042 fprintf (vect_dump, "no optab for reduction.");
3043 epilog_reduc_code = NUM_TREE_CODES;
3045 if (optab_handler (reduc_optab, vec_mode)->insn_code == CODE_FOR_nothing)
3047 if (vect_print_dump_info (REPORT_DETAILS))
3048 fprintf (vect_dump, "reduc op not supported by target.");
3049 epilog_reduc_code = NUM_TREE_CODES;
3052 if (!vec_stmt) /* transformation not required. */
3054 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3055 if (!vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies))
3062 if (vect_print_dump_info (REPORT_DETAILS))
3063 fprintf (vect_dump, "transform reduction.");
3065 /* Create the destination vector */
3066 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3068 /* In case the vectorization factor (VF) is bigger than the number
3069 of elements that we can fit in a vectype (nunits), we have to generate
3070 more than one vector stmt - i.e - we need to "unroll" the
3071 vector stmt by a factor VF/nunits. For more details see documentation
3072 in vectorizable_operation. */
3074 /* If the reduction is used in an outer loop we need to generate
3075 VF intermediate results, like so (e.g. for ncopies=2):
3080 (i.e. we generate VF results in 2 registers).
3081 In this case we have a separate def-use cycle for each copy, and therefore
3082 for each copy we get the vector def for the reduction variable from the
3083 respective phi node created for this copy.
3085 Otherwise (the reduction is unused in the loop nest), we can combine
3086 together intermediate results, like so (e.g. for ncopies=2):
3090 (i.e. we generate VF/2 results in a single register).
3091 In this case for each copy we get the vector def for the reduction variable
3092 from the vectorized reduction operation generated in the previous iteration.
3095 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_loop)
3097 single_defuse_cycle = true;
3101 epilog_copies = ncopies;
3103 prev_stmt_info = NULL;
3104 prev_phi_info = NULL;
3105 for (j = 0; j < ncopies; j++)
3107 if (j == 0 || !single_defuse_cycle)
3109 /* Create the reduction-phi that defines the reduction-operand. */
3110 new_phi = create_phi_node (vec_dest, loop->header);
3111 set_vinfo_for_stmt (new_phi, new_stmt_vec_info (new_phi, loop_vinfo));
3117 loop_vec_def0 = vect_get_vec_def_for_operand (ops[0], stmt, NULL);
3118 if (op_type == ternary_op)
3120 loop_vec_def1 = vect_get_vec_def_for_operand (ops[1], stmt, NULL);
3123 /* Get the vector def for the reduction variable from the phi node */
3124 reduc_def = PHI_RESULT (new_phi);
3125 first_phi = new_phi;
3129 enum vect_def_type dt = vect_unknown_def_type; /* Dummy */
3130 loop_vec_def0 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def0);
3131 if (op_type == ternary_op)
3132 loop_vec_def1 = vect_get_vec_def_for_stmt_copy (dt, loop_vec_def1);
3134 if (single_defuse_cycle)
3135 reduc_def = gimple_assign_lhs (new_stmt);
3137 reduc_def = PHI_RESULT (new_phi);
3139 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi;
3142 /* Arguments are ready. create the new vector stmt. */
3143 if (op_type == binary_op)
3144 expr = build2 (code, vectype, loop_vec_def0, reduc_def);
3146 expr = build3 (code, vectype, loop_vec_def0, loop_vec_def1,
3148 new_stmt = gimple_build_assign (vec_dest, expr);
3149 new_temp = make_ssa_name (vec_dest, new_stmt);
3150 gimple_assign_set_lhs (new_stmt, new_temp);
3151 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3154 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3156 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3157 prev_stmt_info = vinfo_for_stmt (new_stmt);
3158 prev_phi_info = vinfo_for_stmt (new_phi);
3161 /* Finalize the reduction-phi (set its arguments) and create the
3162 epilog reduction code. */
3163 if (!single_defuse_cycle)
3164 new_temp = gimple_assign_lhs (*vec_stmt);
3165 vect_create_epilog_for_reduction (new_temp, stmt, epilog_copies,
3166 epilog_reduc_code, first_phi);
3170 /* Checks if CALL can be vectorized in type VECTYPE. Returns
3171 a function declaration if the target has a vectorized version
3172 of the function, or NULL_TREE if the function cannot be vectorized. */
3175 vectorizable_function (gimple call, tree vectype_out, tree vectype_in)
3177 tree fndecl = gimple_call_fndecl (call);
3178 enum built_in_function code;
3180 /* We only handle functions that do not read or clobber memory -- i.e.
3181 const or novops ones. */
3182 if (!(gimple_call_flags (call) & (ECF_CONST | ECF_NOVOPS)))
3186 || TREE_CODE (fndecl) != FUNCTION_DECL
3187 || !DECL_BUILT_IN (fndecl))
3190 code = DECL_FUNCTION_CODE (fndecl);
3191 return targetm.vectorize.builtin_vectorized_function (code, vectype_out,
3195 /* Function vectorizable_call.
3197 Check if STMT performs a function call that can be vectorized.
3198 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3199 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3200 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3203 vectorizable_call (gimple stmt, gimple_stmt_iterator *gsi, gimple *vec_stmt)
3208 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt), prev_stmt_info;
3210 tree vectype_out, vectype_in;
3213 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3214 tree fndecl, new_temp, def, rhs_type, lhs_type;
3216 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3219 VEC(tree, heap) *vargs = NULL;
3220 enum { NARROW, NONE, WIDEN } modifier;
3223 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3226 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3229 /* FORNOW: SLP not supported. */
3230 if (STMT_SLP_TYPE (stmt_info))
3233 /* Is STMT a vectorizable call? */
3234 if (!is_gimple_call (stmt))
3237 if (TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
3240 /* Process function arguments. */
3241 rhs_type = NULL_TREE;
3242 nargs = gimple_call_num_args (stmt);
3244 /* Bail out if the function has more than two arguments, we
3245 do not have interesting builtin functions to vectorize with
3246 more than two arguments. No arguments is also not good. */
3247 if (nargs == 0 || nargs > 2)
3250 for (i = 0; i < nargs; i++)
3252 op = gimple_call_arg (stmt, i);
3254 /* We can only handle calls with arguments of the same type. */
3256 && rhs_type != TREE_TYPE (op))
3258 if (vect_print_dump_info (REPORT_DETAILS))
3259 fprintf (vect_dump, "argument types differ.");
3262 rhs_type = TREE_TYPE (op);
3264 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[i]))
3266 if (vect_print_dump_info (REPORT_DETAILS))
3267 fprintf (vect_dump, "use not simple.");
3272 vectype_in = get_vectype_for_scalar_type (rhs_type);
3275 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3277 lhs_type = TREE_TYPE (gimple_call_lhs (stmt));
3278 vectype_out = get_vectype_for_scalar_type (lhs_type);
3281 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3284 if (nunits_in == nunits_out / 2)
3286 else if (nunits_out == nunits_in)
3288 else if (nunits_out == nunits_in / 2)
3293 /* For now, we only vectorize functions if a target specific builtin
3294 is available. TODO -- in some cases, it might be profitable to
3295 insert the calls for pieces of the vector, in order to be able
3296 to vectorize other operations in the loop. */
3297 fndecl = vectorizable_function (stmt, vectype_out, vectype_in);
3298 if (fndecl == NULL_TREE)
3300 if (vect_print_dump_info (REPORT_DETAILS))
3301 fprintf (vect_dump, "function is not vectorizable.");
3306 gcc_assert (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS));
3308 if (modifier == NARROW)
3309 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3311 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3313 /* Sanity check: make sure that at least one copy of the vectorized stmt
3314 needs to be generated. */
3315 gcc_assert (ncopies >= 1);
3317 if (!vec_stmt) /* transformation not required. */
3319 STMT_VINFO_TYPE (stmt_info) = call_vec_info_type;
3320 if (vect_print_dump_info (REPORT_DETAILS))
3321 fprintf (vect_dump, "=== vectorizable_call ===");
3322 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3328 if (vect_print_dump_info (REPORT_DETAILS))
3329 fprintf (vect_dump, "transform operation.");
3332 scalar_dest = gimple_call_lhs (stmt);
3333 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3335 prev_stmt_info = NULL;
3339 for (j = 0; j < ncopies; ++j)
3341 /* Build argument list for the vectorized call. */
3343 vargs = VEC_alloc (tree, heap, nargs);
3345 VEC_truncate (tree, vargs, 0);
3347 for (i = 0; i < nargs; i++)
3349 op = gimple_call_arg (stmt, i);
3352 = vect_get_vec_def_for_operand (op, stmt, NULL);
3355 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3357 VEC_quick_push (tree, vargs, vec_oprnd0);
3360 new_stmt = gimple_build_call_vec (fndecl, vargs);
3361 new_temp = make_ssa_name (vec_dest, new_stmt);
3362 gimple_call_set_lhs (new_stmt, new_temp);
3364 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3367 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3369 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3371 prev_stmt_info = vinfo_for_stmt (new_stmt);
3377 for (j = 0; j < ncopies; ++j)
3379 /* Build argument list for the vectorized call. */
3381 vargs = VEC_alloc (tree, heap, nargs * 2);
3383 VEC_truncate (tree, vargs, 0);
3385 for (i = 0; i < nargs; i++)
3387 op = gimple_call_arg (stmt, i);
3391 = vect_get_vec_def_for_operand (op, stmt, NULL);
3393 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3398 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd1);
3400 = vect_get_vec_def_for_stmt_copy (dt[nargs], vec_oprnd0);
3403 VEC_quick_push (tree, vargs, vec_oprnd0);
3404 VEC_quick_push (tree, vargs, vec_oprnd1);
3407 new_stmt = gimple_build_call_vec (fndecl, vargs);
3408 new_temp = make_ssa_name (vec_dest, new_stmt);
3409 gimple_call_set_lhs (new_stmt, new_temp);
3411 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3414 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3416 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3418 prev_stmt_info = vinfo_for_stmt (new_stmt);
3421 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3426 /* No current target implements this case. */
3430 VEC_free (tree, heap, vargs);
3432 /* The call in STMT might prevent it from being removed in dce.
3433 We however cannot remove it here, due to the way the ssa name
3434 it defines is mapped to the new definition. So just replace
3435 rhs of the statement with something harmless. */
3437 type = TREE_TYPE (scalar_dest);
3438 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
3439 fold_convert (type, integer_zero_node));
3440 set_vinfo_for_stmt (new_stmt, stmt_info);
3441 set_vinfo_for_stmt (stmt, NULL);
3442 STMT_VINFO_STMT (stmt_info) = new_stmt;
3443 gsi_replace (gsi, new_stmt, false);
3444 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3450 /* Function vect_gen_widened_results_half
3452 Create a vector stmt whose code, type, number of arguments, and result
3453 variable are CODE, OP_TYPE, and VEC_DEST, and its arguments are
3454 VEC_OPRND0 and VEC_OPRND1. The new vector stmt is to be inserted at BSI.
3455 In the case that CODE is a CALL_EXPR, this means that a call to DECL
3456 needs to be created (DECL is a function-decl of a target-builtin).
3457 STMT is the original scalar stmt that we are vectorizing. */
3460 vect_gen_widened_results_half (enum tree_code code,
3462 tree vec_oprnd0, tree vec_oprnd1, int op_type,
3463 tree vec_dest, gimple_stmt_iterator *gsi,
3471 /* Generate half of the widened result: */
3472 if (code == CALL_EXPR)
3474 /* Target specific support */
3475 if (op_type == binary_op)
3476 new_stmt = gimple_build_call (decl, 2, vec_oprnd0, vec_oprnd1);
3478 new_stmt = gimple_build_call (decl, 1, vec_oprnd0);
3479 new_temp = make_ssa_name (vec_dest, new_stmt);
3480 gimple_call_set_lhs (new_stmt, new_temp);
3484 /* Generic support */
3485 gcc_assert (op_type == TREE_CODE_LENGTH (code));
3486 if (op_type != binary_op)
3488 new_stmt = gimple_build_assign_with_ops (code, vec_dest, vec_oprnd0,
3490 new_temp = make_ssa_name (vec_dest, new_stmt);
3491 gimple_assign_set_lhs (new_stmt, new_temp);
3493 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3495 if (code == CALL_EXPR)
3497 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter, SSA_OP_ALL_VIRTUALS)
3499 if (TREE_CODE (sym) == SSA_NAME)
3500 sym = SSA_NAME_VAR (sym);
3501 mark_sym_for_renaming (sym);
3509 /* Check if STMT performs a conversion operation, that can be vectorized.
3510 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3511 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3512 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3515 vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
3516 gimple *vec_stmt, slp_tree slp_node)
3521 tree vec_oprnd0 = NULL_TREE, vec_oprnd1 = NULL_TREE;
3522 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3523 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3524 enum tree_code code, code1 = ERROR_MARK, code2 = ERROR_MARK;
3525 tree decl1 = NULL_TREE, decl2 = NULL_TREE;
3529 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3530 gimple new_stmt = NULL;
3531 stmt_vec_info prev_stmt_info;
3534 tree vectype_out, vectype_in;
3537 tree rhs_type, lhs_type;
3539 enum { NARROW, NONE, WIDEN } modifier;
3541 VEC(tree,heap) *vec_oprnds0 = NULL;
3544 VEC(tree,heap) *dummy = NULL;
3547 /* Is STMT a vectorizable conversion? */
3549 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3552 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3555 if (!is_gimple_assign (stmt))
3558 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
3561 code = gimple_assign_rhs_code (stmt);
3562 if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
3565 /* Check types of lhs and rhs. */
3566 op0 = gimple_assign_rhs1 (stmt);
3567 rhs_type = TREE_TYPE (op0);
3568 vectype_in = get_vectype_for_scalar_type (rhs_type);
3571 nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
3573 scalar_dest = gimple_assign_lhs (stmt);
3574 lhs_type = TREE_TYPE (scalar_dest);
3575 vectype_out = get_vectype_for_scalar_type (lhs_type);
3578 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
3581 if (nunits_in == nunits_out / 2)
3583 else if (nunits_out == nunits_in)
3585 else if (nunits_out == nunits_in / 2)
3590 if (modifier == NONE)
3591 gcc_assert (STMT_VINFO_VECTYPE (stmt_info) == vectype_out);
3593 /* Bail out if the types are both integral or non-integral. */
3594 if ((INTEGRAL_TYPE_P (rhs_type) && INTEGRAL_TYPE_P (lhs_type))
3595 || (!INTEGRAL_TYPE_P (rhs_type) && !INTEGRAL_TYPE_P (lhs_type)))
3598 integral_type = INTEGRAL_TYPE_P (rhs_type) ? vectype_in : vectype_out;
3600 if (modifier == NARROW)
3601 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_out;
3603 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits_in;
3605 /* FORNOW: SLP with multiple types is not supported. The SLP analysis verifies
3606 this, so we can safely override NCOPIES with 1 here. */
3610 /* Sanity check: make sure that at least one copy of the vectorized stmt
3611 needs to be generated. */
3612 gcc_assert (ncopies >= 1);
3614 /* Check the operands of the operation. */
3615 if (!vect_is_simple_use (op0, loop_vinfo, &def_stmt, &def, &dt[0]))
3617 if (vect_print_dump_info (REPORT_DETAILS))
3618 fprintf (vect_dump, "use not simple.");
3622 /* Supportable by target? */
3623 if ((modifier == NONE
3624 && !targetm.vectorize.builtin_conversion (code, integral_type))
3625 || (modifier == WIDEN
3626 && !supportable_widening_operation (code, stmt, vectype_in,
3629 &dummy_int, &dummy))
3630 || (modifier == NARROW
3631 && !supportable_narrowing_operation (code, stmt, vectype_in,
3632 &code1, &dummy_int, &dummy)))
3634 if (vect_print_dump_info (REPORT_DETAILS))
3635 fprintf (vect_dump, "conversion not supported by target.");
3639 if (modifier != NONE)
3641 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3642 /* FORNOW: SLP not supported. */
3643 if (STMT_SLP_TYPE (stmt_info))
3647 if (!vec_stmt) /* transformation not required. */
3649 STMT_VINFO_TYPE (stmt_info) = type_conversion_vec_info_type;
3654 if (vect_print_dump_info (REPORT_DETAILS))
3655 fprintf (vect_dump, "transform conversion.");
3658 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
3660 if (modifier == NONE && !slp_node)
3661 vec_oprnds0 = VEC_alloc (tree, heap, 1);
3663 prev_stmt_info = NULL;
3667 for (j = 0; j < ncopies; j++)
3673 vect_get_vec_defs (op0, NULL, stmt, &vec_oprnds0, NULL, slp_node);
3675 vect_get_vec_defs_for_stmt_copy (dt, &vec_oprnds0, NULL);
3678 targetm.vectorize.builtin_conversion (code, integral_type);
3679 for (i = 0; VEC_iterate (tree, vec_oprnds0, i, vop0); i++)
3681 /* Arguments are ready. create the new vector stmt. */
3682 new_stmt = gimple_build_call (builtin_decl, 1, vop0);
3683 new_temp = make_ssa_name (vec_dest, new_stmt);
3684 gimple_call_set_lhs (new_stmt, new_temp);
3685 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3686 FOR_EACH_SSA_TREE_OPERAND (sym, new_stmt, iter,
3687 SSA_OP_ALL_VIRTUALS)
3689 if (TREE_CODE (sym) == SSA_NAME)
3690 sym = SSA_NAME_VAR (sym);
3691 mark_sym_for_renaming (sym);
3694 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), new_stmt);
3698 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt;
3700 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3701 prev_stmt_info = vinfo_for_stmt (new_stmt);
3706 /* In case the vectorization factor (VF) is bigger than the number
3707 of elements that we can fit in a vectype (nunits), we have to
3708 generate more than one vector stmt - i.e - we need to "unroll"
3709 the vector stmt by a factor VF/nunits. */
3710 for (j = 0; j < ncopies; j++)
3713 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3715 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3717 STMT_VINFO_VECTYPE (stmt_info) = vectype_in;
3719 /* Generate first half of the widened result: */
3721 = vect_gen_widened_results_half (code1, decl1,
3722 vec_oprnd0, vec_oprnd1,
3723 unary_op, vec_dest, gsi, stmt);
3725 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3727 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3728 prev_stmt_info = vinfo_for_stmt (new_stmt);
3730 /* Generate second half of the widened result: */
3732 = vect_gen_widened_results_half (code2, decl2,
3733 vec_oprnd0, vec_oprnd1,
3734 unary_op, vec_dest, gsi, stmt);
3735 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3736 prev_stmt_info = vinfo_for_stmt (new_stmt);
3741 /* In case the vectorization factor (VF) is bigger than the number
3742 of elements that we can fit in a vectype (nunits), we have to
3743 generate more than one vector stmt - i.e - we need to "unroll"
3744 the vector stmt by a factor VF/nunits. */
3745 for (j = 0; j < ncopies; j++)
3750 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt, NULL);
3751 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3755 vec_oprnd0 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd1);
3756 vec_oprnd1 = vect_get_vec_def_for_stmt_copy (dt[0], vec_oprnd0);
3759 /* Arguments are ready. Create the new vector stmt. */
3760 expr = build2 (code1, vectype_out, vec_oprnd0, vec_oprnd1);
3761 new_stmt = gimple_build_assign_with_ops (code1, vec_dest, vec_oprnd0,
3763 new_temp = make_ssa_name (vec_dest, new_stmt);
3764 gimple_assign_set_lhs (new_stmt, new_temp);
3765 vect_finish_stmt_generation (stmt, new_stmt, gsi);
3768 STMT_VINFO_VEC_STMT (stmt_info) = new_stmt;
3770 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt;
3772 prev_stmt_info = vinfo_for_stmt (new_stmt);
3775 *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
3779 VEC_free (tree, heap, vec_oprnds0);
3785 /* Function vectorizable_assignment.
3787 Check if STMT performs an assignment (copy) that can be vectorized.
3788 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
3789 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
3790 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3793 vectorizable_assignment (gimple stmt, gimple_stmt_iterator *gsi,
3794 gimple *vec_stmt, slp_tree slp_node)
3799 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3800 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3801 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3805 enum vect_def_type dt[2] = {vect_unknown_def_type, vect_unknown_def_type};
3806 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3809 VEC(tree,heap) *vec_oprnds = NULL;
3812 /* Multiple types in SLP are handled by creating the appropriate number of
3813 vectorized stmts for each SLP node. Hence, NCOPIES is always 1 in
3818 ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3820 gcc_assert (ncopies >= 1);
3822 return false; /* FORNOW */
3824 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3827 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_loop_def)
3830 /* Is vectorizable assignment? */
3831 if (!is_gimple_assign (stmt))
3834 scalar_dest = gimple_assign_lhs (stmt);
3835 if (TREE_CODE (scalar_dest) != SSA_NAME)
3838 if (gimple_assign_single_p (stmt)
3839 || gimple_assign_rhs_code (stmt) == PAREN_EXPR)
3840 op = gimple_assign_rhs1 (stmt);
3844 if (!vect_is_simple_use (op, loop_vinfo, &def_stmt, &def, &dt[0]))
3846 if (vect_print_dump_info (REPORT_DETAILS))
3847 fprintf (vect_dump, "use not simple.");
3851 if (!vec_stmt) /* transformation not required. */
3853 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
3854 if (vect_print_dump_info (REPORT_DETAILS))
3855 fprintf (vect_dump, "=== vectorizable_assignment ===");
3856 vect_model_simple_cost (stmt_info, ncopies, dt, NULL);
3861 if (vect_print_dump_info (REPORT_DETAILS))
3862 fprintf (vect_dump, "transform assignment.");
3865 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3868 vect_get_vec_defs (op, NULL, stmt, &vec_oprnds, NULL, slp_node);
3870 /* Arguments are ready. create the new vector stmt. */
3871 for (i = 0; VEC_iterate (tree, vec_oprnds, i, vop); i++)
3873 *vec_stmt = gimple_build_assign (vec_dest, vop);
3874 new_temp = make_ssa_name (vec_dest, *vec_stmt);
3875 gimple_assign_set_lhs (*vec_stmt, new_temp);
3876 vect_finish_stmt_generation (stmt, *vec_stmt, gsi);
3877 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt;
3880 VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node), *vec_stmt);
3883 VEC_free (tree, heap, vec_oprnds);
3888 /* Function vect_min_worthwhile_factor.
3890 For a loop where we could vectorize the operation indicated by CODE,
3891 return the minimum vectorization factor that makes it worthwhile
3892 to use generic vectors. */
3894 vect_min_worthwhile_factor (enum tree_code code)
3915 /* Function vectorizable_induction
3917 Check if PHI performs an induction computation that can be vectorized.
3918 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
3919 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
3920 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
3923 vectorizable_induction (gimple phi, gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
3926 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
3927 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3928 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3929 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3930 int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3931 int ncopies = LOOP_VINFO_VECT_FACTOR (loop_vinfo) / nunits;
3934 gcc_assert (ncopies >= 1);
3935 /* FORNOW. This restriction should be relaxed. */
3936 if (nested_in_vect_loop_p (loop, phi) && ncopies > 1)
3938 if (vect_print_dump_info (REPORT_DETAILS))
3939 fprintf (vect_dump, "multiple types in nested loop.");
3943 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3946 /* FORNOW: SLP not supported. */
3947 if (STMT_SLP_TYPE (stmt_info))
3950 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def);
3952 if (gimple_code (phi) != GIMPLE_PHI)
3955 if (!vec_stmt) /* transformation not required. */
3957 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
3958 if (vect_print_dump_info (REPORT_DETAILS))
3959 fprintf (vect_dump, "=== vectorizable_induction ===");
3960 vect_model_induction_cost (stmt_info, ncopies);
3966 if (vect_print_dump_info (REPORT_DETAILS))
3967 fprintf (vect_dump, "transform induction phi.");
3969 vec_def = get_initial_def_for_induction (phi);
3970 *vec_stmt = SSA_NAME_DEF_STMT (vec_def);
3975 /* Function vectorizable_operation.